snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +19 -0
- snowflake/ml/_internal/env_utils.py +64 -21
- snowflake/ml/_internal/platform_capabilities.py +87 -0
- snowflake/ml/_internal/relax_version_strategy.py +16 -0
- snowflake/ml/_internal/telemetry.py +21 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +1 -1
- snowflake/ml/dataset/dataset.py +0 -1
- snowflake/ml/feature_store/feature_store.py +18 -0
- snowflake/ml/feature_store/feature_view.py +46 -1
- snowflake/ml/fileset/fileset.py +6 -0
- snowflake/ml/jobs/__init__.py +21 -0
- snowflake/ml/jobs/_utils/constants.py +57 -0
- snowflake/ml/jobs/_utils/payload_utils.py +438 -0
- snowflake/ml/jobs/_utils/spec_utils.py +296 -0
- snowflake/ml/jobs/_utils/types.py +39 -0
- snowflake/ml/jobs/decorators.py +71 -0
- snowflake/ml/jobs/job.py +113 -0
- snowflake/ml/jobs/manager.py +298 -0
- snowflake/ml/model/_client/ops/model_ops.py +11 -2
- snowflake/ml/model/_client/ops/service_ops.py +1 -11
- snowflake/ml/model/_client/sql/service.py +13 -6
- snowflake/ml/model/_packager/model_env/model_env.py +45 -28
- snowflake/ml/model/_packager/model_handlers/_utils.py +19 -6
- snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +17 -0
- snowflake/ml/model/_packager/model_handlers/keras.py +230 -0
- snowflake/ml/model/_packager/model_handlers/pytorch.py +1 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +28 -3
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +74 -21
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +27 -49
- snowflake/ml/model/_packager/model_handlers_migrator/tensorflow_migrator_2023_12_01.py +48 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +3 -0
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -1
- snowflake/ml/model/_packager/model_task/model_task_utils.py +5 -1
- snowflake/ml/model/_signatures/base_handler.py +1 -2
- snowflake/ml/model/_signatures/builtins_handler.py +2 -2
- snowflake/ml/model/_signatures/core.py +2 -2
- snowflake/ml/model/_signatures/numpy_handler.py +11 -12
- snowflake/ml/model/_signatures/pandas_handler.py +11 -9
- snowflake/ml/model/_signatures/pytorch_handler.py +3 -6
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
- snowflake/ml/model/model_signature.py +25 -4
- snowflake/ml/model/type_hints.py +15 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +14 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
- snowflake/ml/modeling/cluster/birch.py +6 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
- snowflake/ml/modeling/cluster/dbscan.py +6 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
- snowflake/ml/modeling/cluster/k_means.py +6 -3
- snowflake/ml/modeling/cluster/mean_shift.py +6 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
- snowflake/ml/modeling/cluster/optics.py +6 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
- snowflake/ml/modeling/compose/column_transformer.py +6 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
- snowflake/ml/modeling/covariance/oas.py +6 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/pca.py +6 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
- snowflake/ml/modeling/impute/knn_imputer.py +6 -3
- snowflake/ml/modeling/impute/missing_indicator.py +6 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/lars.py +6 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/perceptron.py +6 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ridge.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
- snowflake/ml/modeling/manifold/isomap.py +6 -3
- snowflake/ml/modeling/manifold/mds.py +6 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
- snowflake/ml/modeling/manifold/tsne.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
- snowflake/ml/modeling/pipeline/pipeline.py +28 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -5
- snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
- snowflake/ml/modeling/svm/linear_svc.py +6 -3
- snowflake/ml/modeling/svm/linear_svr.py +6 -3
- snowflake/ml/modeling/svm/nu_svc.py +6 -3
- snowflake/ml/modeling/svm/nu_svr.py +6 -3
- snowflake/ml/modeling/svm/svc.py +6 -3
- snowflake/ml/modeling/svm/svr.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +6 -3
- snowflake/ml/registry/registry.py +34 -4
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/METADATA +81 -33
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/RECORD +208 -196
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/WHEEL +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class SGDClassifier(BaseTransformer):
|
61
64
|
r"""Linear classifiers (SVM, logistic regression, etc
|
62
65
|
For more details on this class, see [sklearn.linear_model.SGDClassifier]
|
@@ -593,7 +596,7 @@ class SGDClassifier(BaseTransformer):
|
|
593
596
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
594
597
|
expected_dtype = "array"
|
595
598
|
else:
|
596
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
599
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
597
600
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
598
601
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
599
602
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1256,7 +1259,7 @@ class SGDClassifier(BaseTransformer):
|
|
1256
1259
|
|
1257
1260
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1258
1261
|
|
1259
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1262
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1260
1263
|
outputs: List[BaseFeatureSpec] = []
|
1261
1264
|
if hasattr(self, "predict"):
|
1262
1265
|
# keep mypy happy
|
@@ -1264,7 +1267,7 @@ class SGDClassifier(BaseTransformer):
|
|
1264
1267
|
# For classifier, the type of predict is the same as the type of label
|
1265
1268
|
if self._sklearn_object._estimator_type == "classifier":
|
1266
1269
|
# label columns is the desired type for output
|
1267
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1270
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1268
1271
|
# rename the output columns
|
1269
1272
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1270
1273
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class SGDOneClassSVM(BaseTransformer):
|
61
64
|
r"""Solves linear One-Class SVM using Stochastic Gradient Descent
|
62
65
|
For more details on this class, see [sklearn.linear_model.SGDOneClassSVM]
|
@@ -497,7 +500,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
497
500
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
498
501
|
expected_dtype = "array"
|
499
502
|
else:
|
500
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
503
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
501
504
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
502
505
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
503
506
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1158,7 +1161,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
1158
1161
|
|
1159
1162
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1160
1163
|
|
1161
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1164
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1162
1165
|
outputs: List[BaseFeatureSpec] = []
|
1163
1166
|
if hasattr(self, "predict"):
|
1164
1167
|
# keep mypy happy
|
@@ -1166,7 +1169,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
1166
1169
|
# For classifier, the type of predict is the same as the type of label
|
1167
1170
|
if self._sklearn_object._estimator_type == "classifier":
|
1168
1171
|
# label columns is the desired type for output
|
1169
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1172
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1170
1173
|
# rename the output columns
|
1171
1174
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1172
1175
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class SGDRegressor(BaseTransformer):
|
61
64
|
r"""Linear model fitted by minimizing a regularized empirical loss with SGD
|
62
65
|
For more details on this class, see [sklearn.linear_model.SGDRegressor]
|
@@ -568,7 +571,7 @@ class SGDRegressor(BaseTransformer):
|
|
568
571
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
569
572
|
expected_dtype = "array"
|
570
573
|
else:
|
571
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
574
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
572
575
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
573
576
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
574
577
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1225,7 +1228,7 @@ class SGDRegressor(BaseTransformer):
|
|
1225
1228
|
|
1226
1229
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1227
1230
|
|
1228
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1231
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1229
1232
|
outputs: List[BaseFeatureSpec] = []
|
1230
1233
|
if hasattr(self, "predict"):
|
1231
1234
|
# keep mypy happy
|
@@ -1233,7 +1236,7 @@ class SGDRegressor(BaseTransformer):
|
|
1233
1236
|
# For classifier, the type of predict is the same as the type of label
|
1234
1237
|
if self._sklearn_object._estimator_type == "classifier":
|
1235
1238
|
# label columns is the desired type for output
|
1236
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1239
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1237
1240
|
# rename the output columns
|
1238
1241
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1239
1242
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class TheilSenRegressor(BaseTransformer):
|
61
64
|
r"""Theil-Sen Estimator: robust multivariate regression model
|
62
65
|
For more details on this class, see [sklearn.linear_model.TheilSenRegressor]
|
@@ -461,7 +464,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
461
464
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
462
465
|
expected_dtype = "array"
|
463
466
|
else:
|
464
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
467
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
465
468
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
466
469
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
467
470
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1118,7 +1121,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
1118
1121
|
|
1119
1122
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1120
1123
|
|
1121
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1124
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1122
1125
|
outputs: List[BaseFeatureSpec] = []
|
1123
1126
|
if hasattr(self, "predict"):
|
1124
1127
|
# keep mypy happy
|
@@ -1126,7 +1129,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
1126
1129
|
# For classifier, the type of predict is the same as the type of label
|
1127
1130
|
if self._sklearn_object._estimator_type == "classifier":
|
1128
1131
|
# label columns is the desired type for output
|
1129
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1132
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1130
1133
|
# rename the output columns
|
1131
1134
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1132
1135
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class TweedieRegressor(BaseTransformer):
|
61
64
|
r"""Generalized Linear Model with a Tweedie distribution
|
62
65
|
For more details on this class, see [sklearn.linear_model.TweedieRegressor]
|
@@ -487,7 +490,7 @@ class TweedieRegressor(BaseTransformer):
|
|
487
490
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
488
491
|
expected_dtype = "array"
|
489
492
|
else:
|
490
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
493
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
491
494
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
492
495
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
493
496
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1144,7 +1147,7 @@ class TweedieRegressor(BaseTransformer):
|
|
1144
1147
|
|
1145
1148
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1146
1149
|
|
1147
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1150
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1148
1151
|
outputs: List[BaseFeatureSpec] = []
|
1149
1152
|
if hasattr(self, "predict"):
|
1150
1153
|
# keep mypy happy
|
@@ -1152,7 +1155,7 @@ class TweedieRegressor(BaseTransformer):
|
|
1152
1155
|
# For classifier, the type of predict is the same as the type of label
|
1153
1156
|
if self._sklearn_object._estimator_type == "classifier":
|
1154
1157
|
# label columns is the desired type for output
|
1155
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1158
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1156
1159
|
# rename the output columns
|
1157
1160
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1158
1161
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.manifold".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class Isomap(BaseTransformer):
|
61
64
|
r"""Isomap Embedding
|
62
65
|
For more details on this class, see [sklearn.manifold.Isomap]
|
@@ -485,7 +488,7 @@ class Isomap(BaseTransformer):
|
|
485
488
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
486
489
|
expected_dtype = "array"
|
487
490
|
else:
|
488
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
491
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
489
492
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
490
493
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
491
494
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1142,7 +1145,7 @@ class Isomap(BaseTransformer):
|
|
1142
1145
|
|
1143
1146
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1144
1147
|
|
1145
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1148
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1146
1149
|
outputs: List[BaseFeatureSpec] = []
|
1147
1150
|
if hasattr(self, "predict"):
|
1148
1151
|
# keep mypy happy
|
@@ -1150,7 +1153,7 @@ class Isomap(BaseTransformer):
|
|
1150
1153
|
# For classifier, the type of predict is the same as the type of label
|
1151
1154
|
if self._sklearn_object._estimator_type == "classifier":
|
1152
1155
|
# label columns is the desired type for output
|
1153
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1156
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1154
1157
|
# rename the output columns
|
1155
1158
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1156
1159
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.manifold".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class MDS(BaseTransformer):
|
61
64
|
r"""Multidimensional scaling
|
62
65
|
For more details on this class, see [sklearn.manifold.MDS]
|
@@ -466,7 +469,7 @@ class MDS(BaseTransformer):
|
|
466
469
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
467
470
|
expected_dtype = "array"
|
468
471
|
else:
|
469
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
472
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
470
473
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
471
474
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
472
475
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1123,7 +1126,7 @@ class MDS(BaseTransformer):
|
|
1123
1126
|
|
1124
1127
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1125
1128
|
|
1126
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1129
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1127
1130
|
outputs: List[BaseFeatureSpec] = []
|
1128
1131
|
if hasattr(self, "predict"):
|
1129
1132
|
# keep mypy happy
|
@@ -1131,7 +1134,7 @@ class MDS(BaseTransformer):
|
|
1131
1134
|
# For classifier, the type of predict is the same as the type of label
|
1132
1135
|
if self._sklearn_object._estimator_type == "classifier":
|
1133
1136
|
# label columns is the desired type for output
|
1134
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1137
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1135
1138
|
# rename the output columns
|
1136
1139
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1137
1140
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.manifold".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class SpectralEmbedding(BaseTransformer):
|
61
64
|
r"""Spectral embedding for non-linear dimensionality reduction
|
62
65
|
For more details on this class, see [sklearn.manifold.SpectralEmbedding]
|
@@ -468,7 +471,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
468
471
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
469
472
|
expected_dtype = "array"
|
470
473
|
else:
|
471
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
474
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
472
475
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
473
476
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
474
477
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1125,7 +1128,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
1125
1128
|
|
1126
1129
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1127
1130
|
|
1128
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1131
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1129
1132
|
outputs: List[BaseFeatureSpec] = []
|
1130
1133
|
if hasattr(self, "predict"):
|
1131
1134
|
# keep mypy happy
|
@@ -1133,7 +1136,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
1133
1136
|
# For classifier, the type of predict is the same as the type of label
|
1134
1137
|
if self._sklearn_object._estimator_type == "classifier":
|
1135
1138
|
# label columns is the desired type for output
|
1136
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1139
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1137
1140
|
# rename the output columns
|
1138
1141
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1139
1142
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.manifold".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class TSNE(BaseTransformer):
|
61
64
|
r"""T-distributed Stochastic Neighbor Embedding
|
62
65
|
For more details on this class, see [sklearn.manifold.TSNE]
|
@@ -533,7 +536,7 @@ class TSNE(BaseTransformer):
|
|
533
536
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
534
537
|
expected_dtype = "array"
|
535
538
|
else:
|
536
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
539
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
537
540
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
538
541
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
539
542
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1190,7 +1193,7 @@ class TSNE(BaseTransformer):
|
|
1190
1193
|
|
1191
1194
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1192
1195
|
|
1193
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1196
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1194
1197
|
outputs: List[BaseFeatureSpec] = []
|
1195
1198
|
if hasattr(self, "predict"):
|
1196
1199
|
# keep mypy happy
|
@@ -1198,7 +1201,7 @@ class TSNE(BaseTransformer):
|
|
1198
1201
|
# For classifier, the type of predict is the same as the type of label
|
1199
1202
|
if self._sklearn_object._estimator_type == "classifier":
|
1200
1203
|
# label columns is the desired type for output
|
1201
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1204
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1202
1205
|
# rename the output columns
|
1203
1206
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1204
1207
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.mixture".replace("sklear
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class BayesianGaussianMixture(BaseTransformer):
|
61
64
|
r"""Variational Bayesian estimation of a Gaussian mixture
|
62
65
|
For more details on this class, see [sklearn.mixture.BayesianGaussianMixture]
|
@@ -532,7 +535,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
532
535
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
533
536
|
expected_dtype = "array"
|
534
537
|
else:
|
535
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
538
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
536
539
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
537
540
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
538
541
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1197,7 +1200,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
1197
1200
|
|
1198
1201
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1199
1202
|
|
1200
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1203
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1201
1204
|
outputs: List[BaseFeatureSpec] = []
|
1202
1205
|
if hasattr(self, "predict"):
|
1203
1206
|
# keep mypy happy
|
@@ -1205,7 +1208,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
1205
1208
|
# For classifier, the type of predict is the same as the type of label
|
1206
1209
|
if self._sklearn_object._estimator_type == "classifier":
|
1207
1210
|
# label columns is the desired type for output
|
1208
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1211
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1209
1212
|
# rename the output columns
|
1210
1213
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1211
1214
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.mixture".replace("sklear
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class GaussianMixture(BaseTransformer):
|
61
64
|
r"""Gaussian Mixture
|
62
65
|
For more details on this class, see [sklearn.mixture.GaussianMixture]
|
@@ -505,7 +508,7 @@ class GaussianMixture(BaseTransformer):
|
|
505
508
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
506
509
|
expected_dtype = "array"
|
507
510
|
else:
|
508
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
511
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
509
512
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
510
513
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
511
514
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1170,7 +1173,7 @@ class GaussianMixture(BaseTransformer):
|
|
1170
1173
|
|
1171
1174
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1172
1175
|
|
1173
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1176
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1174
1177
|
outputs: List[BaseFeatureSpec] = []
|
1175
1178
|
if hasattr(self, "predict"):
|
1176
1179
|
# keep mypy happy
|
@@ -1178,7 +1181,7 @@ class GaussianMixture(BaseTransformer):
|
|
1178
1181
|
# For classifier, the type of predict is the same as the type of label
|
1179
1182
|
if self._sklearn_object._estimator_type == "classifier":
|
1180
1183
|
# label columns is the desired type for output
|
1181
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1184
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1182
1185
|
# rename the output columns
|
1183
1186
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1184
1187
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -21,6 +21,7 @@ from snowflake.ml.model.model_signature import (
|
|
21
21
|
ModelSignature,
|
22
22
|
_infer_signature,
|
23
23
|
_rename_signature_with_snowflake_identifiers,
|
24
|
+
_truncate_data,
|
24
25
|
)
|
25
26
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
26
27
|
gather_dependencies,
|
@@ -47,6 +48,8 @@ _PROJECT = "ModelDevelopment"
|
|
47
48
|
_SUBPROJECT = "ModelSelection"
|
48
49
|
DEFAULT_UDTF_NJOBS = 3
|
49
50
|
|
51
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
52
|
+
|
50
53
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
51
54
|
|
52
55
|
|
@@ -810,7 +813,13 @@ class GridSearchCV(BaseTransformer):
|
|
810
813
|
|
811
814
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
812
815
|
|
813
|
-
inputs = list(
|
816
|
+
inputs = list(
|
817
|
+
_infer_signature(
|
818
|
+
_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS),
|
819
|
+
"input",
|
820
|
+
use_snowflake_identifiers=True,
|
821
|
+
)
|
822
|
+
)
|
814
823
|
outputs: List[BaseFeatureSpec] = []
|
815
824
|
if hasattr(self, "predict"):
|
816
825
|
# keep mypy happy
|
@@ -818,7 +827,13 @@ class GridSearchCV(BaseTransformer):
|
|
818
827
|
# For classifier, the type of predict is the same as the type of label
|
819
828
|
if self._sklearn_object._estimator_type == "classifier":
|
820
829
|
# label columns is the desired type for output
|
821
|
-
outputs = list(
|
830
|
+
outputs = list(
|
831
|
+
_infer_signature(
|
832
|
+
_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS),
|
833
|
+
"output",
|
834
|
+
use_snowflake_identifiers=True,
|
835
|
+
)
|
836
|
+
)
|
822
837
|
# rename the output columns
|
823
838
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
824
839
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -18,6 +18,7 @@ from snowflake.ml.model.model_signature import (
|
|
18
18
|
ModelSignature,
|
19
19
|
_infer_signature,
|
20
20
|
_rename_signature_with_snowflake_identifiers,
|
21
|
+
_truncate_data,
|
21
22
|
)
|
22
23
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
23
24
|
gather_dependencies,
|
@@ -44,6 +45,8 @@ _PROJECT = "ModelDevelopment"
|
|
44
45
|
_SUBPROJECT = "ModelSelection"
|
45
46
|
DEFAULT_UDTF_NJOBS = 3
|
46
47
|
|
48
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
49
|
+
|
47
50
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
48
51
|
|
49
52
|
|
@@ -825,7 +828,13 @@ class RandomizedSearchCV(BaseTransformer):
|
|
825
828
|
|
826
829
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
827
830
|
|
828
|
-
inputs = list(
|
831
|
+
inputs = list(
|
832
|
+
_infer_signature(
|
833
|
+
_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS),
|
834
|
+
"input",
|
835
|
+
use_snowflake_identifiers=True,
|
836
|
+
)
|
837
|
+
)
|
829
838
|
outputs: List[BaseFeatureSpec] = []
|
830
839
|
if hasattr(self, "predict"):
|
831
840
|
# keep mypy happy
|
@@ -833,7 +842,13 @@ class RandomizedSearchCV(BaseTransformer):
|
|
833
842
|
# For classifier, the type of predict is the same as the type of label
|
834
843
|
if self._sklearn_object._estimator_type == "classifier":
|
835
844
|
# label columns is the desired type for output
|
836
|
-
outputs = list(
|
845
|
+
outputs = list(
|
846
|
+
_infer_signature(
|
847
|
+
_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS),
|
848
|
+
"output",
|
849
|
+
use_snowflake_identifiers=True,
|
850
|
+
)
|
851
|
+
)
|
837
852
|
# rename the output columns
|
838
853
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
839
854
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.multiclass".replace("skl
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class OneVsOneClassifier(BaseTransformer):
|
61
64
|
r"""One-vs-one multiclass strategy
|
62
65
|
For more details on this class, see [sklearn.multiclass.OneVsOneClassifier]
|
@@ -415,7 +418,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
415
418
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
416
419
|
expected_dtype = "array"
|
417
420
|
else:
|
418
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
421
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
419
422
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
420
423
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
421
424
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1074,7 +1077,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
1074
1077
|
|
1075
1078
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1076
1079
|
|
1077
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1080
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1078
1081
|
outputs: List[BaseFeatureSpec] = []
|
1079
1082
|
if hasattr(self, "predict"):
|
1080
1083
|
# keep mypy happy
|
@@ -1082,7 +1085,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
1082
1085
|
# For classifier, the type of predict is the same as the type of label
|
1083
1086
|
if self._sklearn_object._estimator_type == "classifier":
|
1084
1087
|
# label columns is the desired type for output
|
1085
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1088
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1086
1089
|
# rename the output columns
|
1087
1090
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1088
1091
|
self._model_signature_dict["predict"] = ModelSignature(
|