snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +19 -0
- snowflake/ml/_internal/platform_capabilities.py +87 -0
- snowflake/ml/dataset/dataset.py +0 -1
- snowflake/ml/fileset/fileset.py +6 -0
- snowflake/ml/jobs/__init__.py +21 -0
- snowflake/ml/jobs/_utils/constants.py +51 -0
- snowflake/ml/jobs/_utils/payload_utils.py +352 -0
- snowflake/ml/jobs/_utils/spec_utils.py +298 -0
- snowflake/ml/jobs/_utils/types.py +39 -0
- snowflake/ml/jobs/decorators.py +91 -0
- snowflake/ml/jobs/job.py +113 -0
- snowflake/ml/jobs/manager.py +298 -0
- snowflake/ml/model/_client/ops/model_ops.py +11 -2
- snowflake/ml/model/_client/ops/service_ops.py +1 -11
- snowflake/ml/model/_client/sql/service.py +13 -6
- snowflake/ml/model/_packager/model_handlers/_utils.py +12 -3
- snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +1 -0
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_signatures/base_handler.py +1 -2
- snowflake/ml/model/_signatures/builtins_handler.py +2 -2
- snowflake/ml/model/_signatures/numpy_handler.py +6 -7
- snowflake/ml/model/_signatures/pandas_handler.py +2 -2
- snowflake/ml/model/_signatures/pytorch_handler.py +2 -5
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
- snowflake/ml/model/model_signature.py +17 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
- snowflake/ml/modeling/cluster/birch.py +6 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
- snowflake/ml/modeling/cluster/dbscan.py +6 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
- snowflake/ml/modeling/cluster/k_means.py +6 -3
- snowflake/ml/modeling/cluster/mean_shift.py +6 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
- snowflake/ml/modeling/cluster/optics.py +6 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
- snowflake/ml/modeling/compose/column_transformer.py +6 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
- snowflake/ml/modeling/covariance/oas.py +6 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/pca.py +6 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
- snowflake/ml/modeling/impute/knn_imputer.py +6 -3
- snowflake/ml/modeling/impute/missing_indicator.py +6 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/lars.py +6 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/perceptron.py +6 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ridge.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
- snowflake/ml/modeling/manifold/isomap.py +6 -3
- snowflake/ml/modeling/manifold/mds.py +6 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
- snowflake/ml/modeling/manifold/tsne.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
- snowflake/ml/modeling/pipeline/pipeline.py +10 -2
- snowflake/ml/modeling/preprocessing/polynomial_features.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
- snowflake/ml/modeling/svm/linear_svc.py +6 -3
- snowflake/ml/modeling/svm/linear_svr.py +6 -3
- snowflake/ml/modeling/svm/nu_svc.py +6 -3
- snowflake/ml/modeling/svm/nu_svr.py +6 -3
- snowflake/ml/modeling/svm/svc.py +6 -3
- snowflake/ml/modeling/svm/svr.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +6 -3
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/METADATA +29 -14
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/RECORD +187 -178
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class GradientBoostingClassifier(BaseTransformer):
|
61
64
|
r"""Gradient Boosting for classification
|
62
65
|
For more details on this class, see [sklearn.ensemble.GradientBoostingClassifier]
|
@@ -598,7 +601,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
598
601
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
599
602
|
expected_dtype = "array"
|
600
603
|
else:
|
601
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
604
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
602
605
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
603
606
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
604
607
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1261,7 +1264,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
1261
1264
|
|
1262
1265
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1263
1266
|
|
1264
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1267
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1265
1268
|
outputs: List[BaseFeatureSpec] = []
|
1266
1269
|
if hasattr(self, "predict"):
|
1267
1270
|
# keep mypy happy
|
@@ -1269,7 +1272,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
1269
1272
|
# For classifier, the type of predict is the same as the type of label
|
1270
1273
|
if self._sklearn_object._estimator_type == "classifier":
|
1271
1274
|
# label columns is the desired type for output
|
1272
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1275
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1273
1276
|
# rename the output columns
|
1274
1277
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1275
1278
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class GradientBoostingRegressor(BaseTransformer):
|
61
64
|
r"""Gradient Boosting for regression
|
62
65
|
For more details on this class, see [sklearn.ensemble.GradientBoostingRegressor]
|
@@ -607,7 +610,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
607
610
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
608
611
|
expected_dtype = "array"
|
609
612
|
else:
|
610
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
613
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
611
614
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
612
615
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
613
616
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1264,7 +1267,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1264
1267
|
|
1265
1268
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1266
1269
|
|
1267
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1270
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1268
1271
|
outputs: List[BaseFeatureSpec] = []
|
1269
1272
|
if hasattr(self, "predict"):
|
1270
1273
|
# keep mypy happy
|
@@ -1272,7 +1275,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1272
1275
|
# For classifier, the type of predict is the same as the type of label
|
1273
1276
|
if self._sklearn_object._estimator_type == "classifier":
|
1274
1277
|
# label columns is the desired type for output
|
1275
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1278
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1276
1279
|
# rename the output columns
|
1277
1280
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1278
1281
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class HistGradientBoostingClassifier(BaseTransformer):
|
61
64
|
r"""Histogram-based Gradient Boosting Classification Tree
|
62
65
|
For more details on this class, see [sklearn.ensemble.HistGradientBoostingClassifier]
|
@@ -590,7 +593,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
590
593
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
591
594
|
expected_dtype = "array"
|
592
595
|
else:
|
593
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
596
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
594
597
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
595
598
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
596
599
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1253,7 +1256,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1253
1256
|
|
1254
1257
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1255
1258
|
|
1256
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1259
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1257
1260
|
outputs: List[BaseFeatureSpec] = []
|
1258
1261
|
if hasattr(self, "predict"):
|
1259
1262
|
# keep mypy happy
|
@@ -1261,7 +1264,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1261
1264
|
# For classifier, the type of predict is the same as the type of label
|
1262
1265
|
if self._sklearn_object._estimator_type == "classifier":
|
1263
1266
|
# label columns is the desired type for output
|
1264
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1267
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1265
1268
|
# rename the output columns
|
1266
1269
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1267
1270
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class HistGradientBoostingRegressor(BaseTransformer):
|
61
64
|
r"""Histogram-based Gradient Boosting Regression Tree
|
62
65
|
For more details on this class, see [sklearn.ensemble.HistGradientBoostingRegressor]
|
@@ -578,7 +581,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
578
581
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
579
582
|
expected_dtype = "array"
|
580
583
|
else:
|
581
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
584
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
582
585
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
583
586
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
584
587
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1235,7 +1238,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1235
1238
|
|
1236
1239
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1237
1240
|
|
1238
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1241
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1239
1242
|
outputs: List[BaseFeatureSpec] = []
|
1240
1243
|
if hasattr(self, "predict"):
|
1241
1244
|
# keep mypy happy
|
@@ -1243,7 +1246,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1243
1246
|
# For classifier, the type of predict is the same as the type of label
|
1244
1247
|
if self._sklearn_object._estimator_type == "classifier":
|
1245
1248
|
# label columns is the desired type for output
|
1246
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1249
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1247
1250
|
# rename the output columns
|
1248
1251
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1249
1252
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class IsolationForest(BaseTransformer):
|
61
64
|
r"""Isolation Forest Algorithm
|
62
65
|
For more details on this class, see [sklearn.ensemble.IsolationForest]
|
@@ -470,7 +473,7 @@ class IsolationForest(BaseTransformer):
|
|
470
473
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
471
474
|
expected_dtype = "array"
|
472
475
|
else:
|
473
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
476
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
474
477
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
475
478
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
476
479
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1131,7 +1134,7 @@ class IsolationForest(BaseTransformer):
|
|
1131
1134
|
|
1132
1135
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1133
1136
|
|
1134
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1137
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1135
1138
|
outputs: List[BaseFeatureSpec] = []
|
1136
1139
|
if hasattr(self, "predict"):
|
1137
1140
|
# keep mypy happy
|
@@ -1139,7 +1142,7 @@ class IsolationForest(BaseTransformer):
|
|
1139
1142
|
# For classifier, the type of predict is the same as the type of label
|
1140
1143
|
if self._sklearn_object._estimator_type == "classifier":
|
1141
1144
|
# label columns is the desired type for output
|
1142
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1145
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1143
1146
|
# rename the output columns
|
1144
1147
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1145
1148
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class RandomForestClassifier(BaseTransformer):
|
61
64
|
r"""A random forest classifier
|
62
65
|
For more details on this class, see [sklearn.ensemble.RandomForestClassifier]
|
@@ -599,7 +602,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
599
602
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
600
603
|
expected_dtype = "array"
|
601
604
|
else:
|
602
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
605
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
603
606
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
604
607
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
605
608
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1260,7 +1263,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
1260
1263
|
|
1261
1264
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1262
1265
|
|
1263
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1266
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1264
1267
|
outputs: List[BaseFeatureSpec] = []
|
1265
1268
|
if hasattr(self, "predict"):
|
1266
1269
|
# keep mypy happy
|
@@ -1268,7 +1271,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
1268
1271
|
# For classifier, the type of predict is the same as the type of label
|
1269
1272
|
if self._sklearn_object._estimator_type == "classifier":
|
1270
1273
|
# label columns is the desired type for output
|
1271
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1274
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1272
1275
|
# rename the output columns
|
1273
1276
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1274
1277
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class RandomForestRegressor(BaseTransformer):
|
61
64
|
r"""A random forest regressor
|
62
65
|
For more details on this class, see [sklearn.ensemble.RandomForestRegressor]
|
@@ -575,7 +578,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
575
578
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
576
579
|
expected_dtype = "array"
|
577
580
|
else:
|
578
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
581
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
579
582
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
580
583
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
581
584
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1232,7 +1235,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
1232
1235
|
|
1233
1236
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1234
1237
|
|
1235
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1238
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1236
1239
|
outputs: List[BaseFeatureSpec] = []
|
1237
1240
|
if hasattr(self, "predict"):
|
1238
1241
|
# keep mypy happy
|
@@ -1240,7 +1243,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
1240
1243
|
# For classifier, the type of predict is the same as the type of label
|
1241
1244
|
if self._sklearn_object._estimator_type == "classifier":
|
1242
1245
|
# label columns is the desired type for output
|
1243
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1246
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1244
1247
|
# rename the output columns
|
1245
1248
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1246
1249
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class StackingRegressor(BaseTransformer):
|
61
64
|
r"""Stack of estimators with a final regressor
|
62
65
|
For more details on this class, see [sklearn.ensemble.StackingRegressor]
|
@@ -462,7 +465,7 @@ class StackingRegressor(BaseTransformer):
|
|
462
465
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
463
466
|
expected_dtype = "array"
|
464
467
|
else:
|
465
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
468
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
466
469
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
467
470
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
468
471
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1121,7 +1124,7 @@ class StackingRegressor(BaseTransformer):
|
|
1121
1124
|
|
1122
1125
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1123
1126
|
|
1124
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1127
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1125
1128
|
outputs: List[BaseFeatureSpec] = []
|
1126
1129
|
if hasattr(self, "predict"):
|
1127
1130
|
# keep mypy happy
|
@@ -1129,7 +1132,7 @@ class StackingRegressor(BaseTransformer):
|
|
1129
1132
|
# For classifier, the type of predict is the same as the type of label
|
1130
1133
|
if self._sklearn_object._estimator_type == "classifier":
|
1131
1134
|
# label columns is the desired type for output
|
1132
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1135
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1133
1136
|
# rename the output columns
|
1134
1137
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1135
1138
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class VotingClassifier(BaseTransformer):
|
61
64
|
r"""Soft Voting/Majority Rule classifier for unfitted estimators
|
62
65
|
For more details on this class, see [sklearn.ensemble.VotingClassifier]
|
@@ -444,7 +447,7 @@ class VotingClassifier(BaseTransformer):
|
|
444
447
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
445
448
|
expected_dtype = "array"
|
446
449
|
else:
|
447
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
450
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
448
451
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
449
452
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
450
453
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1107,7 +1110,7 @@ class VotingClassifier(BaseTransformer):
|
|
1107
1110
|
|
1108
1111
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1109
1112
|
|
1110
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1113
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1111
1114
|
outputs: List[BaseFeatureSpec] = []
|
1112
1115
|
if hasattr(self, "predict"):
|
1113
1116
|
# keep mypy happy
|
@@ -1115,7 +1118,7 @@ class VotingClassifier(BaseTransformer):
|
|
1115
1118
|
# For classifier, the type of predict is the same as the type of label
|
1116
1119
|
if self._sklearn_object._estimator_type == "classifier":
|
1117
1120
|
# label columns is the desired type for output
|
1118
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1121
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1119
1122
|
# rename the output columns
|
1120
1123
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1121
1124
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class VotingRegressor(BaseTransformer):
|
61
64
|
r"""Prediction voting regressor for unfitted estimators
|
62
65
|
For more details on this class, see [sklearn.ensemble.VotingRegressor]
|
@@ -426,7 +429,7 @@ class VotingRegressor(BaseTransformer):
|
|
426
429
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
427
430
|
expected_dtype = "array"
|
428
431
|
else:
|
429
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
432
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
430
433
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
431
434
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
432
435
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1085,7 +1088,7 @@ class VotingRegressor(BaseTransformer):
|
|
1085
1088
|
|
1086
1089
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1087
1090
|
|
1088
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1091
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1089
1092
|
outputs: List[BaseFeatureSpec] = []
|
1090
1093
|
if hasattr(self, "predict"):
|
1091
1094
|
# keep mypy happy
|
@@ -1093,7 +1096,7 @@ class VotingRegressor(BaseTransformer):
|
|
1093
1096
|
# For classifier, the type of predict is the same as the type of label
|
1094
1097
|
if self._sklearn_object._estimator_type == "classifier":
|
1095
1098
|
# label columns is the desired type for output
|
1096
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1099
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1097
1100
|
# rename the output columns
|
1098
1101
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1099
1102
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -38,6 +38,7 @@ from snowflake.ml.model.model_signature import (
|
|
38
38
|
FeatureSpec,
|
39
39
|
ModelSignature,
|
40
40
|
_infer_signature,
|
41
|
+
_truncate_data,
|
41
42
|
_rename_signature_with_snowflake_identifiers,
|
42
43
|
)
|
43
44
|
|
@@ -58,6 +59,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.feature_selection".repla
|
|
58
59
|
|
59
60
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
60
61
|
|
62
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
63
|
+
|
61
64
|
class GenericUnivariateSelect(BaseTransformer):
|
62
65
|
r"""Univariate feature selector with configurable strategy
|
63
66
|
For more details on this class, see [sklearn.feature_selection.GenericUnivariateSelect]
|
@@ -415,7 +418,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
415
418
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
416
419
|
expected_dtype = "array"
|
417
420
|
else:
|
418
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
421
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
419
422
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
420
423
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
421
424
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1072,7 +1075,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
1072
1075
|
|
1073
1076
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1074
1077
|
|
1075
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1078
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1076
1079
|
outputs: List[BaseFeatureSpec] = []
|
1077
1080
|
if hasattr(self, "predict"):
|
1078
1081
|
# keep mypy happy
|
@@ -1080,7 +1083,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
1080
1083
|
# For classifier, the type of predict is the same as the type of label
|
1081
1084
|
if self._sklearn_object._estimator_type == "classifier":
|
1082
1085
|
# label columns is the desired type for output
|
1083
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1086
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1084
1087
|
# rename the output columns
|
1085
1088
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1086
1089
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -38,6 +38,7 @@ from snowflake.ml.model.model_signature import (
|
|
38
38
|
FeatureSpec,
|
39
39
|
ModelSignature,
|
40
40
|
_infer_signature,
|
41
|
+
_truncate_data,
|
41
42
|
_rename_signature_with_snowflake_identifiers,
|
42
43
|
)
|
43
44
|
|
@@ -58,6 +59,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.feature_selection".repla
|
|
58
59
|
|
59
60
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
60
61
|
|
62
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
63
|
+
|
61
64
|
class SelectFdr(BaseTransformer):
|
62
65
|
r"""Filter: Select the p-values for an estimated false discovery rate
|
63
66
|
For more details on this class, see [sklearn.feature_selection.SelectFdr]
|
@@ -410,7 +413,7 @@ class SelectFdr(BaseTransformer):
|
|
410
413
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
411
414
|
expected_dtype = "array"
|
412
415
|
else:
|
413
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
416
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
414
417
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
415
418
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
416
419
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1067,7 +1070,7 @@ class SelectFdr(BaseTransformer):
|
|
1067
1070
|
|
1068
1071
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1069
1072
|
|
1070
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1073
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1071
1074
|
outputs: List[BaseFeatureSpec] = []
|
1072
1075
|
if hasattr(self, "predict"):
|
1073
1076
|
# keep mypy happy
|
@@ -1075,7 +1078,7 @@ class SelectFdr(BaseTransformer):
|
|
1075
1078
|
# For classifier, the type of predict is the same as the type of label
|
1076
1079
|
if self._sklearn_object._estimator_type == "classifier":
|
1077
1080
|
# label columns is the desired type for output
|
1078
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1081
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1079
1082
|
# rename the output columns
|
1080
1083
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1081
1084
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -38,6 +38,7 @@ from snowflake.ml.model.model_signature import (
|
|
38
38
|
FeatureSpec,
|
39
39
|
ModelSignature,
|
40
40
|
_infer_signature,
|
41
|
+
_truncate_data,
|
41
42
|
_rename_signature_with_snowflake_identifiers,
|
42
43
|
)
|
43
44
|
|
@@ -58,6 +59,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.feature_selection".repla
|
|
58
59
|
|
59
60
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
60
61
|
|
62
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
63
|
+
|
61
64
|
class SelectFpr(BaseTransformer):
|
62
65
|
r"""Filter: Select the pvalues below alpha based on a FPR test
|
63
66
|
For more details on this class, see [sklearn.feature_selection.SelectFpr]
|
@@ -410,7 +413,7 @@ class SelectFpr(BaseTransformer):
|
|
410
413
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
411
414
|
expected_dtype = "array"
|
412
415
|
else:
|
413
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
416
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
414
417
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
415
418
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
416
419
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1067,7 +1070,7 @@ class SelectFpr(BaseTransformer):
|
|
1067
1070
|
|
1068
1071
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1069
1072
|
|
1070
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1073
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1071
1074
|
outputs: List[BaseFeatureSpec] = []
|
1072
1075
|
if hasattr(self, "predict"):
|
1073
1076
|
# keep mypy happy
|
@@ -1075,7 +1078,7 @@ class SelectFpr(BaseTransformer):
|
|
1075
1078
|
# For classifier, the type of predict is the same as the type of label
|
1076
1079
|
if self._sklearn_object._estimator_type == "classifier":
|
1077
1080
|
# label columns is the desired type for output
|
1078
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1081
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1079
1082
|
# rename the output columns
|
1080
1083
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1081
1084
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -38,6 +38,7 @@ from snowflake.ml.model.model_signature import (
|
|
38
38
|
FeatureSpec,
|
39
39
|
ModelSignature,
|
40
40
|
_infer_signature,
|
41
|
+
_truncate_data,
|
41
42
|
_rename_signature_with_snowflake_identifiers,
|
42
43
|
)
|
43
44
|
|
@@ -58,6 +59,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.feature_selection".repla
|
|
58
59
|
|
59
60
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
60
61
|
|
62
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
63
|
+
|
61
64
|
class SelectFwe(BaseTransformer):
|
62
65
|
r"""Filter: Select the p-values corresponding to Family-wise error rate
|
63
66
|
For more details on this class, see [sklearn.feature_selection.SelectFwe]
|
@@ -410,7 +413,7 @@ class SelectFwe(BaseTransformer):
|
|
410
413
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
411
414
|
expected_dtype = "array"
|
412
415
|
else:
|
413
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
416
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
414
417
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
415
418
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
416
419
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1067,7 +1070,7 @@ class SelectFwe(BaseTransformer):
|
|
1067
1070
|
|
1068
1071
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1069
1072
|
|
1070
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1073
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1071
1074
|
outputs: List[BaseFeatureSpec] = []
|
1072
1075
|
if hasattr(self, "predict"):
|
1073
1076
|
# keep mypy happy
|
@@ -1075,7 +1078,7 @@ class SelectFwe(BaseTransformer):
|
|
1075
1078
|
# For classifier, the type of predict is the same as the type of label
|
1076
1079
|
if self._sklearn_object._estimator_type == "classifier":
|
1077
1080
|
# label columns is the desired type for output
|
1078
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1081
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1079
1082
|
# rename the output columns
|
1080
1083
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1081
1084
|
self._model_signature_dict["predict"] = ModelSignature(
|