snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +19 -0
- snowflake/ml/_internal/env_utils.py +64 -21
- snowflake/ml/_internal/platform_capabilities.py +87 -0
- snowflake/ml/_internal/relax_version_strategy.py +16 -0
- snowflake/ml/_internal/telemetry.py +21 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +1 -1
- snowflake/ml/dataset/dataset.py +0 -1
- snowflake/ml/feature_store/feature_store.py +18 -0
- snowflake/ml/feature_store/feature_view.py +46 -1
- snowflake/ml/fileset/fileset.py +6 -0
- snowflake/ml/jobs/__init__.py +21 -0
- snowflake/ml/jobs/_utils/constants.py +57 -0
- snowflake/ml/jobs/_utils/payload_utils.py +438 -0
- snowflake/ml/jobs/_utils/spec_utils.py +296 -0
- snowflake/ml/jobs/_utils/types.py +39 -0
- snowflake/ml/jobs/decorators.py +71 -0
- snowflake/ml/jobs/job.py +113 -0
- snowflake/ml/jobs/manager.py +298 -0
- snowflake/ml/model/_client/ops/model_ops.py +11 -2
- snowflake/ml/model/_client/ops/service_ops.py +1 -11
- snowflake/ml/model/_client/sql/service.py +13 -6
- snowflake/ml/model/_packager/model_env/model_env.py +45 -28
- snowflake/ml/model/_packager/model_handlers/_utils.py +19 -6
- snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +17 -0
- snowflake/ml/model/_packager/model_handlers/keras.py +230 -0
- snowflake/ml/model/_packager/model_handlers/pytorch.py +1 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +28 -3
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +74 -21
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +27 -49
- snowflake/ml/model/_packager/model_handlers_migrator/tensorflow_migrator_2023_12_01.py +48 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +3 -0
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -1
- snowflake/ml/model/_packager/model_task/model_task_utils.py +5 -1
- snowflake/ml/model/_signatures/base_handler.py +1 -2
- snowflake/ml/model/_signatures/builtins_handler.py +2 -2
- snowflake/ml/model/_signatures/core.py +2 -2
- snowflake/ml/model/_signatures/numpy_handler.py +11 -12
- snowflake/ml/model/_signatures/pandas_handler.py +11 -9
- snowflake/ml/model/_signatures/pytorch_handler.py +3 -6
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
- snowflake/ml/model/model_signature.py +25 -4
- snowflake/ml/model/type_hints.py +15 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +14 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
- snowflake/ml/modeling/cluster/birch.py +6 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
- snowflake/ml/modeling/cluster/dbscan.py +6 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
- snowflake/ml/modeling/cluster/k_means.py +6 -3
- snowflake/ml/modeling/cluster/mean_shift.py +6 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
- snowflake/ml/modeling/cluster/optics.py +6 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
- snowflake/ml/modeling/compose/column_transformer.py +6 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
- snowflake/ml/modeling/covariance/oas.py +6 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/pca.py +6 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
- snowflake/ml/modeling/impute/knn_imputer.py +6 -3
- snowflake/ml/modeling/impute/missing_indicator.py +6 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/lars.py +6 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/perceptron.py +6 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ridge.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
- snowflake/ml/modeling/manifold/isomap.py +6 -3
- snowflake/ml/modeling/manifold/mds.py +6 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
- snowflake/ml/modeling/manifold/tsne.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
- snowflake/ml/modeling/pipeline/pipeline.py +28 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -5
- snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
- snowflake/ml/modeling/svm/linear_svc.py +6 -3
- snowflake/ml/modeling/svm/linear_svr.py +6 -3
- snowflake/ml/modeling/svm/nu_svc.py +6 -3
- snowflake/ml/modeling/svm/nu_svr.py +6 -3
- snowflake/ml/modeling/svm/svc.py +6 -3
- snowflake/ml/modeling/svm/svr.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +6 -3
- snowflake/ml/registry/registry.py +34 -4
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/METADATA +81 -33
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/RECORD +208 -196
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/WHEEL +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.neighbors".replace("skle
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class RadiusNeighborsClassifier(BaseTransformer):
|
61
64
|
r"""Classifier implementing a vote among neighbors within a given radius
|
62
65
|
For more details on this class, see [sklearn.neighbors.RadiusNeighborsClassifier]
|
@@ -495,7 +498,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
495
498
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
496
499
|
expected_dtype = "array"
|
497
500
|
else:
|
498
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
501
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
499
502
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
500
503
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
501
504
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1156,7 +1159,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
1156
1159
|
|
1157
1160
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1158
1161
|
|
1159
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1162
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1160
1163
|
outputs: List[BaseFeatureSpec] = []
|
1161
1164
|
if hasattr(self, "predict"):
|
1162
1165
|
# keep mypy happy
|
@@ -1164,7 +1167,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
1164
1167
|
# For classifier, the type of predict is the same as the type of label
|
1165
1168
|
if self._sklearn_object._estimator_type == "classifier":
|
1166
1169
|
# label columns is the desired type for output
|
1167
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1170
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1168
1171
|
# rename the output columns
|
1169
1172
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1170
1173
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.neighbors".replace("skle
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class RadiusNeighborsRegressor(BaseTransformer):
|
61
64
|
r"""Regression based on neighbors within a fixed radius
|
62
65
|
For more details on this class, see [sklearn.neighbors.RadiusNeighborsRegressor]
|
@@ -480,7 +483,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
480
483
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
481
484
|
expected_dtype = "array"
|
482
485
|
else:
|
483
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
486
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
484
487
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
485
488
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
486
489
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1137,7 +1140,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
1137
1140
|
|
1138
1141
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1139
1142
|
|
1140
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1143
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1141
1144
|
outputs: List[BaseFeatureSpec] = []
|
1142
1145
|
if hasattr(self, "predict"):
|
1143
1146
|
# keep mypy happy
|
@@ -1145,7 +1148,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
1145
1148
|
# For classifier, the type of predict is the same as the type of label
|
1146
1149
|
if self._sklearn_object._estimator_type == "classifier":
|
1147
1150
|
# label columns is the desired type for output
|
1148
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1151
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1149
1152
|
# rename the output columns
|
1150
1153
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1151
1154
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.neural_network".replace(
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class BernoulliRBM(BaseTransformer):
|
61
64
|
r"""Bernoulli Restricted Boltzmann Machine (RBM)
|
62
65
|
For more details on this class, see [sklearn.neural_network.BernoulliRBM]
|
@@ -439,7 +442,7 @@ class BernoulliRBM(BaseTransformer):
|
|
439
442
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
440
443
|
expected_dtype = "array"
|
441
444
|
else:
|
442
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
445
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
443
446
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
444
447
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
445
448
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1098,7 +1101,7 @@ class BernoulliRBM(BaseTransformer):
|
|
1098
1101
|
|
1099
1102
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1100
1103
|
|
1101
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1104
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1102
1105
|
outputs: List[BaseFeatureSpec] = []
|
1103
1106
|
if hasattr(self, "predict"):
|
1104
1107
|
# keep mypy happy
|
@@ -1106,7 +1109,7 @@ class BernoulliRBM(BaseTransformer):
|
|
1106
1109
|
# For classifier, the type of predict is the same as the type of label
|
1107
1110
|
if self._sklearn_object._estimator_type == "classifier":
|
1108
1111
|
# label columns is the desired type for output
|
1109
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1112
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1110
1113
|
# rename the output columns
|
1111
1114
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1112
1115
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.neural_network".replace(
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class MLPClassifier(BaseTransformer):
|
61
64
|
r"""Multi-layer Perceptron classifier
|
62
65
|
For more details on this class, see [sklearn.neural_network.MLPClassifier]
|
@@ -598,7 +601,7 @@ class MLPClassifier(BaseTransformer):
|
|
598
601
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
599
602
|
expected_dtype = "array"
|
600
603
|
else:
|
601
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
604
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
602
605
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
603
606
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
604
607
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1259,7 +1262,7 @@ class MLPClassifier(BaseTransformer):
|
|
1259
1262
|
|
1260
1263
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1261
1264
|
|
1262
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1265
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1263
1266
|
outputs: List[BaseFeatureSpec] = []
|
1264
1267
|
if hasattr(self, "predict"):
|
1265
1268
|
# keep mypy happy
|
@@ -1267,7 +1270,7 @@ class MLPClassifier(BaseTransformer):
|
|
1267
1270
|
# For classifier, the type of predict is the same as the type of label
|
1268
1271
|
if self._sklearn_object._estimator_type == "classifier":
|
1269
1272
|
# label columns is the desired type for output
|
1270
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1273
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1271
1274
|
# rename the output columns
|
1272
1275
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1273
1276
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.neural_network".replace(
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class MLPRegressor(BaseTransformer):
|
61
64
|
r"""Multi-layer Perceptron regressor
|
62
65
|
For more details on this class, see [sklearn.neural_network.MLPRegressor]
|
@@ -591,7 +594,7 @@ class MLPRegressor(BaseTransformer):
|
|
591
594
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
592
595
|
expected_dtype = "array"
|
593
596
|
else:
|
594
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
597
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
595
598
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
596
599
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
597
600
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1248,7 +1251,7 @@ class MLPRegressor(BaseTransformer):
|
|
1248
1251
|
|
1249
1252
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1250
1253
|
|
1251
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1254
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1252
1255
|
outputs: List[BaseFeatureSpec] = []
|
1253
1256
|
if hasattr(self, "predict"):
|
1254
1257
|
# keep mypy happy
|
@@ -1256,7 +1259,7 @@ class MLPRegressor(BaseTransformer):
|
|
1256
1259
|
# For classifier, the type of predict is the same as the type of label
|
1257
1260
|
if self._sklearn_object._estimator_type == "classifier":
|
1258
1261
|
# label columns is the desired type for output
|
1259
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1262
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1260
1263
|
# rename the output columns
|
1261
1264
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1262
1265
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -20,7 +20,11 @@ from snowflake.ml._internal.exceptions import error_codes, exceptions
|
|
20
20
|
from snowflake.ml._internal.lineage import lineage_utils
|
21
21
|
from snowflake.ml._internal.utils import snowpark_dataframe_utils, temp_file_utils
|
22
22
|
from snowflake.ml.data import data_source
|
23
|
-
from snowflake.ml.model.model_signature import
|
23
|
+
from snowflake.ml.model.model_signature import (
|
24
|
+
ModelSignature,
|
25
|
+
_infer_signature,
|
26
|
+
_truncate_data,
|
27
|
+
)
|
24
28
|
from snowflake.ml.modeling._internal.model_transformer_builder import (
|
25
29
|
ModelTransformerBuilder,
|
26
30
|
)
|
@@ -31,6 +35,8 @@ from snowflake.snowpark._internal import utils as snowpark_utils
|
|
31
35
|
_PROJECT = "ModelDevelopment"
|
32
36
|
_SUBPROJECT = "Framework"
|
33
37
|
|
38
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
39
|
+
|
34
40
|
|
35
41
|
def _final_step_has(attr: str) -> Callable[..., bool]:
|
36
42
|
"""Check that final_estimator has `attr`. Used together with `available_if` in `Pipeline`."""
|
@@ -848,6 +854,7 @@ class Pipeline(base.BaseTransformer):
|
|
848
854
|
# Create a fitted sklearn pipeline object by translating each non-estimator step in pipeline with with
|
849
855
|
# a fitted column transformer.
|
850
856
|
sksteps = []
|
857
|
+
i = 0
|
851
858
|
for i, (name, trans) in enumerate(self._get_transformers()):
|
852
859
|
if isinstance(trans, base.BaseTransformer):
|
853
860
|
trans = self._construct_fitted_column_transformer_object(
|
@@ -885,13 +892,31 @@ class Pipeline(base.BaseTransformer):
|
|
885
892
|
self._model_signature_dict = dict()
|
886
893
|
|
887
894
|
input_columns = self._get_sanitized_list_of_columns(dataset.columns)
|
888
|
-
inputs_signature = _infer_signature(
|
895
|
+
inputs_signature = _infer_signature(
|
896
|
+
_truncate_data(dataset[input_columns], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True
|
897
|
+
)
|
889
898
|
|
890
899
|
estimator_step = self._get_estimator()
|
891
900
|
if estimator_step:
|
892
901
|
estimator_signatures = estimator_step[1].model_signatures
|
893
902
|
for method, signature in estimator_signatures.items():
|
894
|
-
|
903
|
+
# Add the inferred input signature to the model signature dictionary for each method
|
904
|
+
self._model_signature_dict[method] = ModelSignature(
|
905
|
+
inputs=inputs_signature,
|
906
|
+
outputs=(
|
907
|
+
# If _drop_input_cols is True, do not include any input columns in the output signature
|
908
|
+
[]
|
909
|
+
if self._drop_input_cols
|
910
|
+
else [
|
911
|
+
# Include input columns in the output signature if they are not already present
|
912
|
+
# Those already present means they are overwritten by the output of the estimator
|
913
|
+
spec
|
914
|
+
for spec in inputs_signature
|
915
|
+
if spec.name not in [_spec.name for _spec in signature.outputs]
|
916
|
+
]
|
917
|
+
)
|
918
|
+
+ signature.outputs, # Append the existing output signature
|
919
|
+
)
|
895
920
|
|
896
921
|
@property
|
897
922
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.preprocessing".replace("
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class PolynomialFeatures(BaseTransformer):
|
61
64
|
r"""Generate polynomial and interaction features
|
62
65
|
For more details on this class, see [sklearn.preprocessing.PolynomialFeatures]
|
@@ -334,7 +337,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
334
337
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
335
338
|
|
336
339
|
if isinstance(dataset, DataFrame):
|
337
|
-
expected_type_inferred = ""
|
340
|
+
expected_type_inferred = "float"
|
338
341
|
# when it is classifier, infer the datatype from label columns
|
339
342
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
340
343
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
@@ -412,7 +415,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
412
415
|
# are specific to the type of dataset used.
|
413
416
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
414
417
|
if isinstance(dataset, DataFrame):
|
415
|
-
expected_dtype = ""
|
418
|
+
expected_dtype = "float"
|
416
419
|
if False: # is child of _BaseHeterogeneousEnsemble
|
417
420
|
# transform() method of HeterogeneousEnsemble estimators return responses of varying shapes
|
418
421
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
@@ -429,7 +432,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
429
432
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
430
433
|
expected_dtype = "array"
|
431
434
|
else:
|
432
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
435
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
433
436
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
434
437
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
435
438
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1086,7 +1089,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
1086
1089
|
|
1087
1090
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1088
1091
|
|
1089
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1092
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1090
1093
|
outputs: List[BaseFeatureSpec] = []
|
1091
1094
|
if hasattr(self, "predict"):
|
1092
1095
|
# keep mypy happy
|
@@ -1094,7 +1097,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
1094
1097
|
# For classifier, the type of predict is the same as the type of label
|
1095
1098
|
if self._sklearn_object._estimator_type == "classifier":
|
1096
1099
|
# label columns is the desired type for output
|
1097
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1100
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1098
1101
|
# rename the output columns
|
1099
1102
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1100
1103
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.semi_supervised".replace
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LabelPropagation(BaseTransformer):
|
61
64
|
r"""Label Propagation classifier
|
62
65
|
For more details on this class, see [sklearn.semi_supervised.LabelPropagation]
|
@@ -433,7 +436,7 @@ class LabelPropagation(BaseTransformer):
|
|
433
436
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
434
437
|
expected_dtype = "array"
|
435
438
|
else:
|
436
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
439
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
437
440
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
438
441
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
439
442
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1094,7 +1097,7 @@ class LabelPropagation(BaseTransformer):
|
|
1094
1097
|
|
1095
1098
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1096
1099
|
|
1097
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1100
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1098
1101
|
outputs: List[BaseFeatureSpec] = []
|
1099
1102
|
if hasattr(self, "predict"):
|
1100
1103
|
# keep mypy happy
|
@@ -1102,7 +1105,7 @@ class LabelPropagation(BaseTransformer):
|
|
1102
1105
|
# For classifier, the type of predict is the same as the type of label
|
1103
1106
|
if self._sklearn_object._estimator_type == "classifier":
|
1104
1107
|
# label columns is the desired type for output
|
1105
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1108
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1106
1109
|
# rename the output columns
|
1107
1110
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1108
1111
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.semi_supervised".replace
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LabelSpreading(BaseTransformer):
|
61
64
|
r"""LabelSpreading model for semi-supervised learning
|
62
65
|
For more details on this class, see [sklearn.semi_supervised.LabelSpreading]
|
@@ -442,7 +445,7 @@ class LabelSpreading(BaseTransformer):
|
|
442
445
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
443
446
|
expected_dtype = "array"
|
444
447
|
else:
|
445
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
448
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
446
449
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
447
450
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
448
451
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1103,7 +1106,7 @@ class LabelSpreading(BaseTransformer):
|
|
1103
1106
|
|
1104
1107
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1105
1108
|
|
1106
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1109
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1107
1110
|
outputs: List[BaseFeatureSpec] = []
|
1108
1111
|
if hasattr(self, "predict"):
|
1109
1112
|
# keep mypy happy
|
@@ -1111,7 +1114,7 @@ class LabelSpreading(BaseTransformer):
|
|
1111
1114
|
# For classifier, the type of predict is the same as the type of label
|
1112
1115
|
if self._sklearn_object._estimator_type == "classifier":
|
1113
1116
|
# label columns is the desired type for output
|
1114
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1117
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1115
1118
|
# rename the output columns
|
1116
1119
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1117
1120
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LinearSVC(BaseTransformer):
|
61
64
|
r"""Linear Support Vector Classification
|
62
65
|
For more details on this class, see [sklearn.svm.LinearSVC]
|
@@ -507,7 +510,7 @@ class LinearSVC(BaseTransformer):
|
|
507
510
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
508
511
|
expected_dtype = "array"
|
509
512
|
else:
|
510
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
513
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
511
514
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
512
515
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
513
516
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1166,7 +1169,7 @@ class LinearSVC(BaseTransformer):
|
|
1166
1169
|
|
1167
1170
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1168
1171
|
|
1169
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1172
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1170
1173
|
outputs: List[BaseFeatureSpec] = []
|
1171
1174
|
if hasattr(self, "predict"):
|
1172
1175
|
# keep mypy happy
|
@@ -1174,7 +1177,7 @@ class LinearSVC(BaseTransformer):
|
|
1174
1177
|
# For classifier, the type of predict is the same as the type of label
|
1175
1178
|
if self._sklearn_object._estimator_type == "classifier":
|
1176
1179
|
# label columns is the desired type for output
|
1177
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1180
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1178
1181
|
# rename the output columns
|
1179
1182
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1180
1183
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LinearSVR(BaseTransformer):
|
61
64
|
r"""Linear Support Vector Regression
|
62
65
|
For more details on this class, see [sklearn.svm.LinearSVR]
|
@@ -476,7 +479,7 @@ class LinearSVR(BaseTransformer):
|
|
476
479
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
477
480
|
expected_dtype = "array"
|
478
481
|
else:
|
479
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
482
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
480
483
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
481
484
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
482
485
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1133,7 +1136,7 @@ class LinearSVR(BaseTransformer):
|
|
1133
1136
|
|
1134
1137
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1135
1138
|
|
1136
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1139
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1137
1140
|
outputs: List[BaseFeatureSpec] = []
|
1138
1141
|
if hasattr(self, "predict"):
|
1139
1142
|
# keep mypy happy
|
@@ -1141,7 +1144,7 @@ class LinearSVR(BaseTransformer):
|
|
1141
1144
|
# For classifier, the type of predict is the same as the type of label
|
1142
1145
|
if self._sklearn_object._estimator_type == "classifier":
|
1143
1146
|
# label columns is the desired type for output
|
1144
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1147
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1145
1148
|
# rename the output columns
|
1146
1149
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1147
1150
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class NuSVC(BaseTransformer):
|
61
64
|
r"""Nu-Support Vector Classification
|
62
65
|
For more details on this class, see [sklearn.svm.NuSVC]
|
@@ -506,7 +509,7 @@ class NuSVC(BaseTransformer):
|
|
506
509
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
507
510
|
expected_dtype = "array"
|
508
511
|
else:
|
509
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
512
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
510
513
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
511
514
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
512
515
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1169,7 +1172,7 @@ class NuSVC(BaseTransformer):
|
|
1169
1172
|
|
1170
1173
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1171
1174
|
|
1172
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1175
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1173
1176
|
outputs: List[BaseFeatureSpec] = []
|
1174
1177
|
if hasattr(self, "predict"):
|
1175
1178
|
# keep mypy happy
|
@@ -1177,7 +1180,7 @@ class NuSVC(BaseTransformer):
|
|
1177
1180
|
# For classifier, the type of predict is the same as the type of label
|
1178
1181
|
if self._sklearn_object._estimator_type == "classifier":
|
1179
1182
|
# label columns is the desired type for output
|
1180
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1183
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1181
1184
|
# rename the output columns
|
1182
1185
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1183
1186
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class NuSVR(BaseTransformer):
|
61
64
|
r"""Nu Support Vector Regression
|
62
65
|
For more details on this class, see [sklearn.svm.NuSVR]
|
@@ -467,7 +470,7 @@ class NuSVR(BaseTransformer):
|
|
467
470
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
468
471
|
expected_dtype = "array"
|
469
472
|
else:
|
470
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
473
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
471
474
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
472
475
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
473
476
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1124,7 +1127,7 @@ class NuSVR(BaseTransformer):
|
|
1124
1127
|
|
1125
1128
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1126
1129
|
|
1127
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1130
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1128
1131
|
outputs: List[BaseFeatureSpec] = []
|
1129
1132
|
if hasattr(self, "predict"):
|
1130
1133
|
# keep mypy happy
|
@@ -1132,7 +1135,7 @@ class NuSVR(BaseTransformer):
|
|
1132
1135
|
# For classifier, the type of predict is the same as the type of label
|
1133
1136
|
if self._sklearn_object._estimator_type == "classifier":
|
1134
1137
|
# label columns is the desired type for output
|
1135
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1138
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1136
1139
|
# rename the output columns
|
1137
1140
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1138
1141
|
self._model_signature_dict["predict"] = ModelSignature(
|