snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +19 -0
- snowflake/ml/_internal/env_utils.py +64 -21
- snowflake/ml/_internal/platform_capabilities.py +87 -0
- snowflake/ml/_internal/relax_version_strategy.py +16 -0
- snowflake/ml/_internal/telemetry.py +21 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +1 -1
- snowflake/ml/dataset/dataset.py +0 -1
- snowflake/ml/feature_store/feature_store.py +18 -0
- snowflake/ml/feature_store/feature_view.py +46 -1
- snowflake/ml/fileset/fileset.py +6 -0
- snowflake/ml/jobs/__init__.py +21 -0
- snowflake/ml/jobs/_utils/constants.py +57 -0
- snowflake/ml/jobs/_utils/payload_utils.py +438 -0
- snowflake/ml/jobs/_utils/spec_utils.py +296 -0
- snowflake/ml/jobs/_utils/types.py +39 -0
- snowflake/ml/jobs/decorators.py +71 -0
- snowflake/ml/jobs/job.py +113 -0
- snowflake/ml/jobs/manager.py +298 -0
- snowflake/ml/model/_client/ops/model_ops.py +11 -2
- snowflake/ml/model/_client/ops/service_ops.py +1 -11
- snowflake/ml/model/_client/sql/service.py +13 -6
- snowflake/ml/model/_packager/model_env/model_env.py +45 -28
- snowflake/ml/model/_packager/model_handlers/_utils.py +19 -6
- snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +17 -0
- snowflake/ml/model/_packager/model_handlers/keras.py +230 -0
- snowflake/ml/model/_packager/model_handlers/pytorch.py +1 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +28 -3
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +74 -21
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +27 -49
- snowflake/ml/model/_packager/model_handlers_migrator/tensorflow_migrator_2023_12_01.py +48 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +3 -0
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -1
- snowflake/ml/model/_packager/model_task/model_task_utils.py +5 -1
- snowflake/ml/model/_signatures/base_handler.py +1 -2
- snowflake/ml/model/_signatures/builtins_handler.py +2 -2
- snowflake/ml/model/_signatures/core.py +2 -2
- snowflake/ml/model/_signatures/numpy_handler.py +11 -12
- snowflake/ml/model/_signatures/pandas_handler.py +11 -9
- snowflake/ml/model/_signatures/pytorch_handler.py +3 -6
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
- snowflake/ml/model/model_signature.py +25 -4
- snowflake/ml/model/type_hints.py +15 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +14 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
- snowflake/ml/modeling/cluster/birch.py +6 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
- snowflake/ml/modeling/cluster/dbscan.py +6 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
- snowflake/ml/modeling/cluster/k_means.py +6 -3
- snowflake/ml/modeling/cluster/mean_shift.py +6 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
- snowflake/ml/modeling/cluster/optics.py +6 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
- snowflake/ml/modeling/compose/column_transformer.py +6 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
- snowflake/ml/modeling/covariance/oas.py +6 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/pca.py +6 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
- snowflake/ml/modeling/impute/knn_imputer.py +6 -3
- snowflake/ml/modeling/impute/missing_indicator.py +6 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/lars.py +6 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/perceptron.py +6 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ridge.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
- snowflake/ml/modeling/manifold/isomap.py +6 -3
- snowflake/ml/modeling/manifold/mds.py +6 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
- snowflake/ml/modeling/manifold/tsne.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
- snowflake/ml/modeling/pipeline/pipeline.py +28 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -5
- snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
- snowflake/ml/modeling/svm/linear_svc.py +6 -3
- snowflake/ml/modeling/svm/linear_svr.py +6 -3
- snowflake/ml/modeling/svm/nu_svc.py +6 -3
- snowflake/ml/modeling/svm/nu_svr.py +6 -3
- snowflake/ml/modeling/svm/svc.py +6 -3
- snowflake/ml/modeling/svm/svr.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +6 -3
- snowflake/ml/registry/registry.py +34 -4
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/METADATA +81 -33
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/RECORD +208 -196
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/WHEEL +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class ElasticNet(BaseTransformer):
|
61
64
|
r"""Linear regression with combined L1 and L2 priors as regularizer
|
62
65
|
For more details on this class, see [sklearn.linear_model.ElasticNet]
|
@@ -476,7 +479,7 @@ class ElasticNet(BaseTransformer):
|
|
476
479
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
477
480
|
expected_dtype = "array"
|
478
481
|
else:
|
479
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
482
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
480
483
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
481
484
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
482
485
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1133,7 +1136,7 @@ class ElasticNet(BaseTransformer):
|
|
1133
1136
|
|
1134
1137
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1135
1138
|
|
1136
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1139
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1137
1140
|
outputs: List[BaseFeatureSpec] = []
|
1138
1141
|
if hasattr(self, "predict"):
|
1139
1142
|
# keep mypy happy
|
@@ -1141,7 +1144,7 @@ class ElasticNet(BaseTransformer):
|
|
1141
1144
|
# For classifier, the type of predict is the same as the type of label
|
1142
1145
|
if self._sklearn_object._estimator_type == "classifier":
|
1143
1146
|
# label columns is the desired type for output
|
1144
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1147
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1145
1148
|
# rename the output columns
|
1146
1149
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1147
1150
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class ElasticNetCV(BaseTransformer):
|
61
64
|
r"""Elastic Net model with iterative fitting along a regularization path
|
62
65
|
For more details on this class, see [sklearn.linear_model.ElasticNetCV]
|
@@ -509,7 +512,7 @@ class ElasticNetCV(BaseTransformer):
|
|
509
512
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
510
513
|
expected_dtype = "array"
|
511
514
|
else:
|
512
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
515
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
513
516
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
514
517
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
515
518
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1166,7 +1169,7 @@ class ElasticNetCV(BaseTransformer):
|
|
1166
1169
|
|
1167
1170
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1168
1171
|
|
1169
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1172
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1170
1173
|
outputs: List[BaseFeatureSpec] = []
|
1171
1174
|
if hasattr(self, "predict"):
|
1172
1175
|
# keep mypy happy
|
@@ -1174,7 +1177,7 @@ class ElasticNetCV(BaseTransformer):
|
|
1174
1177
|
# For classifier, the type of predict is the same as the type of label
|
1175
1178
|
if self._sklearn_object._estimator_type == "classifier":
|
1176
1179
|
# label columns is the desired type for output
|
1177
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1180
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1178
1181
|
# rename the output columns
|
1179
1182
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1180
1183
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class GammaRegressor(BaseTransformer):
|
61
64
|
r"""Generalized Linear Model with a Gamma distribution
|
62
65
|
For more details on this class, see [sklearn.linear_model.GammaRegressor]
|
@@ -454,7 +457,7 @@ class GammaRegressor(BaseTransformer):
|
|
454
457
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
455
458
|
expected_dtype = "array"
|
456
459
|
else:
|
457
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
460
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
458
461
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
459
462
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
460
463
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1111,7 +1114,7 @@ class GammaRegressor(BaseTransformer):
|
|
1111
1114
|
|
1112
1115
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1113
1116
|
|
1114
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1117
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1115
1118
|
outputs: List[BaseFeatureSpec] = []
|
1116
1119
|
if hasattr(self, "predict"):
|
1117
1120
|
# keep mypy happy
|
@@ -1119,7 +1122,7 @@ class GammaRegressor(BaseTransformer):
|
|
1119
1122
|
# For classifier, the type of predict is the same as the type of label
|
1120
1123
|
if self._sklearn_object._estimator_type == "classifier":
|
1121
1124
|
# label columns is the desired type for output
|
1122
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1125
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1123
1126
|
# rename the output columns
|
1124
1127
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1125
1128
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class HuberRegressor(BaseTransformer):
|
61
64
|
r"""L2-regularized linear regression model that is robust to outliers
|
62
65
|
For more details on this class, see [sklearn.linear_model.HuberRegressor]
|
@@ -437,7 +440,7 @@ class HuberRegressor(BaseTransformer):
|
|
437
440
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
438
441
|
expected_dtype = "array"
|
439
442
|
else:
|
440
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
443
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
441
444
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
442
445
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
443
446
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1094,7 +1097,7 @@ class HuberRegressor(BaseTransformer):
|
|
1094
1097
|
|
1095
1098
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1096
1099
|
|
1097
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1100
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1098
1101
|
outputs: List[BaseFeatureSpec] = []
|
1099
1102
|
if hasattr(self, "predict"):
|
1100
1103
|
# keep mypy happy
|
@@ -1102,7 +1105,7 @@ class HuberRegressor(BaseTransformer):
|
|
1102
1105
|
# For classifier, the type of predict is the same as the type of label
|
1103
1106
|
if self._sklearn_object._estimator_type == "classifier":
|
1104
1107
|
# label columns is the desired type for output
|
1105
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1108
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1106
1109
|
# rename the output columns
|
1107
1110
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1108
1111
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class Lars(BaseTransformer):
|
61
64
|
r"""Least Angle Regression model a
|
62
65
|
For more details on this class, see [sklearn.linear_model.Lars]
|
@@ -456,7 +459,7 @@ class Lars(BaseTransformer):
|
|
456
459
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
457
460
|
expected_dtype = "array"
|
458
461
|
else:
|
459
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
462
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
460
463
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
461
464
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
462
465
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1113,7 +1116,7 @@ class Lars(BaseTransformer):
|
|
1113
1116
|
|
1114
1117
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1115
1118
|
|
1116
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1119
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1117
1120
|
outputs: List[BaseFeatureSpec] = []
|
1118
1121
|
if hasattr(self, "predict"):
|
1119
1122
|
# keep mypy happy
|
@@ -1121,7 +1124,7 @@ class Lars(BaseTransformer):
|
|
1121
1124
|
# For classifier, the type of predict is the same as the type of label
|
1122
1125
|
if self._sklearn_object._estimator_type == "classifier":
|
1123
1126
|
# label columns is the desired type for output
|
1124
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1127
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1125
1128
|
# rename the output columns
|
1126
1129
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1127
1130
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LarsCV(BaseTransformer):
|
61
64
|
r"""Cross-validated Least Angle Regression model
|
62
65
|
For more details on this class, see [sklearn.linear_model.LarsCV]
|
@@ -464,7 +467,7 @@ class LarsCV(BaseTransformer):
|
|
464
467
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
465
468
|
expected_dtype = "array"
|
466
469
|
else:
|
467
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
470
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
468
471
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
469
472
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
470
473
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1121,7 +1124,7 @@ class LarsCV(BaseTransformer):
|
|
1121
1124
|
|
1122
1125
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1123
1126
|
|
1124
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1127
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1125
1128
|
outputs: List[BaseFeatureSpec] = []
|
1126
1129
|
if hasattr(self, "predict"):
|
1127
1130
|
# keep mypy happy
|
@@ -1129,7 +1132,7 @@ class LarsCV(BaseTransformer):
|
|
1129
1132
|
# For classifier, the type of predict is the same as the type of label
|
1130
1133
|
if self._sklearn_object._estimator_type == "classifier":
|
1131
1134
|
# label columns is the desired type for output
|
1132
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1135
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1133
1136
|
# rename the output columns
|
1134
1137
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1135
1138
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class Lasso(BaseTransformer):
|
61
64
|
r"""Linear Model trained with L1 prior as regularizer (aka the Lasso)
|
62
65
|
For more details on this class, see [sklearn.linear_model.Lasso]
|
@@ -467,7 +470,7 @@ class Lasso(BaseTransformer):
|
|
467
470
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
468
471
|
expected_dtype = "array"
|
469
472
|
else:
|
470
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
473
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
471
474
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
472
475
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
473
476
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1124,7 +1127,7 @@ class Lasso(BaseTransformer):
|
|
1124
1127
|
|
1125
1128
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1126
1129
|
|
1127
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1130
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1128
1131
|
outputs: List[BaseFeatureSpec] = []
|
1129
1132
|
if hasattr(self, "predict"):
|
1130
1133
|
# keep mypy happy
|
@@ -1132,7 +1135,7 @@ class Lasso(BaseTransformer):
|
|
1132
1135
|
# For classifier, the type of predict is the same as the type of label
|
1133
1136
|
if self._sklearn_object._estimator_type == "classifier":
|
1134
1137
|
# label columns is the desired type for output
|
1135
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1138
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1136
1139
|
# rename the output columns
|
1137
1140
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1138
1141
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LassoCV(BaseTransformer):
|
61
64
|
r"""Lasso linear model with iterative fitting along a regularization path
|
62
65
|
For more details on this class, see [sklearn.linear_model.LassoCV]
|
@@ -495,7 +498,7 @@ class LassoCV(BaseTransformer):
|
|
495
498
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
496
499
|
expected_dtype = "array"
|
497
500
|
else:
|
498
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
501
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
499
502
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
500
503
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
501
504
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1152,7 +1155,7 @@ class LassoCV(BaseTransformer):
|
|
1152
1155
|
|
1153
1156
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1154
1157
|
|
1155
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1158
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1156
1159
|
outputs: List[BaseFeatureSpec] = []
|
1157
1160
|
if hasattr(self, "predict"):
|
1158
1161
|
# keep mypy happy
|
@@ -1160,7 +1163,7 @@ class LassoCV(BaseTransformer):
|
|
1160
1163
|
# For classifier, the type of predict is the same as the type of label
|
1161
1164
|
if self._sklearn_object._estimator_type == "classifier":
|
1162
1165
|
# label columns is the desired type for output
|
1163
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1166
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1164
1167
|
# rename the output columns
|
1165
1168
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1166
1169
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LassoLars(BaseTransformer):
|
61
64
|
r"""Lasso model fit with Least Angle Regression a
|
62
65
|
For more details on this class, see [sklearn.linear_model.LassoLars]
|
@@ -477,7 +480,7 @@ class LassoLars(BaseTransformer):
|
|
477
480
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
478
481
|
expected_dtype = "array"
|
479
482
|
else:
|
480
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
483
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
481
484
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
482
485
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
483
486
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1134,7 +1137,7 @@ class LassoLars(BaseTransformer):
|
|
1134
1137
|
|
1135
1138
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1136
1139
|
|
1137
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1140
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1138
1141
|
outputs: List[BaseFeatureSpec] = []
|
1139
1142
|
if hasattr(self, "predict"):
|
1140
1143
|
# keep mypy happy
|
@@ -1142,7 +1145,7 @@ class LassoLars(BaseTransformer):
|
|
1142
1145
|
# For classifier, the type of predict is the same as the type of label
|
1143
1146
|
if self._sklearn_object._estimator_type == "classifier":
|
1144
1147
|
# label columns is the desired type for output
|
1145
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1148
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1146
1149
|
# rename the output columns
|
1147
1150
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1148
1151
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LassoLarsCV(BaseTransformer):
|
61
64
|
r"""Cross-validated Lasso, using the LARS algorithm
|
62
65
|
For more details on this class, see [sklearn.linear_model.LassoLarsCV]
|
@@ -478,7 +481,7 @@ class LassoLarsCV(BaseTransformer):
|
|
478
481
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
479
482
|
expected_dtype = "array"
|
480
483
|
else:
|
481
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
484
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
482
485
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
483
486
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
484
487
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1135,7 +1138,7 @@ class LassoLarsCV(BaseTransformer):
|
|
1135
1138
|
|
1136
1139
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1137
1140
|
|
1138
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1141
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1139
1142
|
outputs: List[BaseFeatureSpec] = []
|
1140
1143
|
if hasattr(self, "predict"):
|
1141
1144
|
# keep mypy happy
|
@@ -1143,7 +1146,7 @@ class LassoLarsCV(BaseTransformer):
|
|
1143
1146
|
# For classifier, the type of predict is the same as the type of label
|
1144
1147
|
if self._sklearn_object._estimator_type == "classifier":
|
1145
1148
|
# label columns is the desired type for output
|
1146
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1149
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1147
1150
|
# rename the output columns
|
1148
1151
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1149
1152
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LassoLarsIC(BaseTransformer):
|
61
64
|
r"""Lasso model fit with Lars using BIC or AIC for model selection
|
62
65
|
For more details on this class, see [sklearn.linear_model.LassoLarsIC]
|
@@ -461,7 +464,7 @@ class LassoLarsIC(BaseTransformer):
|
|
461
464
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
462
465
|
expected_dtype = "array"
|
463
466
|
else:
|
464
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
467
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
465
468
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
466
469
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
467
470
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1118,7 +1121,7 @@ class LassoLarsIC(BaseTransformer):
|
|
1118
1121
|
|
1119
1122
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1120
1123
|
|
1121
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1124
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1122
1125
|
outputs: List[BaseFeatureSpec] = []
|
1123
1126
|
if hasattr(self, "predict"):
|
1124
1127
|
# keep mypy happy
|
@@ -1126,7 +1129,7 @@ class LassoLarsIC(BaseTransformer):
|
|
1126
1129
|
# For classifier, the type of predict is the same as the type of label
|
1127
1130
|
if self._sklearn_object._estimator_type == "classifier":
|
1128
1131
|
# label columns is the desired type for output
|
1129
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1132
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1130
1133
|
# rename the output columns
|
1131
1134
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1132
1135
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LinearRegression(BaseTransformer):
|
61
64
|
r"""Ordinary least squares Linear Regression
|
62
65
|
For more details on this class, see [sklearn.linear_model.LinearRegression]
|
@@ -424,7 +427,7 @@ class LinearRegression(BaseTransformer):
|
|
424
427
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
425
428
|
expected_dtype = "array"
|
426
429
|
else:
|
427
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
430
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
428
431
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
429
432
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
430
433
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1081,7 +1084,7 @@ class LinearRegression(BaseTransformer):
|
|
1081
1084
|
|
1082
1085
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1083
1086
|
|
1084
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1087
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1085
1088
|
outputs: List[BaseFeatureSpec] = []
|
1086
1089
|
if hasattr(self, "predict"):
|
1087
1090
|
# keep mypy happy
|
@@ -1089,7 +1092,7 @@ class LinearRegression(BaseTransformer):
|
|
1089
1092
|
# For classifier, the type of predict is the same as the type of label
|
1090
1093
|
if self._sklearn_object._estimator_type == "classifier":
|
1091
1094
|
# label columns is the desired type for output
|
1092
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1095
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1093
1096
|
# rename the output columns
|
1094
1097
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1095
1098
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LogisticRegression(BaseTransformer):
|
61
64
|
r"""Logistic Regression (aka logit, MaxEnt) classifier
|
62
65
|
For more details on this class, see [sklearn.linear_model.LogisticRegression]
|
@@ -544,7 +547,7 @@ class LogisticRegression(BaseTransformer):
|
|
544
547
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
545
548
|
expected_dtype = "array"
|
546
549
|
else:
|
547
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
550
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
548
551
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
549
552
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
550
553
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1207,7 +1210,7 @@ class LogisticRegression(BaseTransformer):
|
|
1207
1210
|
|
1208
1211
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1209
1212
|
|
1210
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1213
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1211
1214
|
outputs: List[BaseFeatureSpec] = []
|
1212
1215
|
if hasattr(self, "predict"):
|
1213
1216
|
# keep mypy happy
|
@@ -1215,7 +1218,7 @@ class LogisticRegression(BaseTransformer):
|
|
1215
1218
|
# For classifier, the type of predict is the same as the type of label
|
1216
1219
|
if self._sklearn_object._estimator_type == "classifier":
|
1217
1220
|
# label columns is the desired type for output
|
1218
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1221
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1219
1222
|
# rename the output columns
|
1220
1223
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1221
1224
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LogisticRegressionCV(BaseTransformer):
|
61
64
|
r"""Logistic Regression CV (aka logit, MaxEnt) classifier
|
62
65
|
For more details on this class, see [sklearn.linear_model.LogisticRegressionCV]
|
@@ -565,7 +568,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
565
568
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
566
569
|
expected_dtype = "array"
|
567
570
|
else:
|
568
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
571
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
569
572
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
570
573
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
571
574
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1228,7 +1231,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
1228
1231
|
|
1229
1232
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1230
1233
|
|
1231
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1234
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1232
1235
|
outputs: List[BaseFeatureSpec] = []
|
1233
1236
|
if hasattr(self, "predict"):
|
1234
1237
|
# keep mypy happy
|
@@ -1236,7 +1239,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
1236
1239
|
# For classifier, the type of predict is the same as the type of label
|
1237
1240
|
if self._sklearn_object._estimator_type == "classifier":
|
1238
1241
|
# label columns is the desired type for output
|
1239
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1242
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1240
1243
|
# rename the output columns
|
1241
1244
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1242
1245
|
self._model_signature_dict["predict"] = ModelSignature(
|