snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. snowflake/cortex/_complete.py +19 -0
  2. snowflake/ml/_internal/env_utils.py +64 -21
  3. snowflake/ml/_internal/platform_capabilities.py +87 -0
  4. snowflake/ml/_internal/relax_version_strategy.py +16 -0
  5. snowflake/ml/_internal/telemetry.py +21 -0
  6. snowflake/ml/data/_internal/arrow_ingestor.py +1 -1
  7. snowflake/ml/dataset/dataset.py +0 -1
  8. snowflake/ml/feature_store/feature_store.py +18 -0
  9. snowflake/ml/feature_store/feature_view.py +46 -1
  10. snowflake/ml/fileset/fileset.py +6 -0
  11. snowflake/ml/jobs/__init__.py +21 -0
  12. snowflake/ml/jobs/_utils/constants.py +57 -0
  13. snowflake/ml/jobs/_utils/payload_utils.py +438 -0
  14. snowflake/ml/jobs/_utils/spec_utils.py +296 -0
  15. snowflake/ml/jobs/_utils/types.py +39 -0
  16. snowflake/ml/jobs/decorators.py +71 -0
  17. snowflake/ml/jobs/job.py +113 -0
  18. snowflake/ml/jobs/manager.py +298 -0
  19. snowflake/ml/model/_client/ops/model_ops.py +11 -2
  20. snowflake/ml/model/_client/ops/service_ops.py +1 -11
  21. snowflake/ml/model/_client/sql/service.py +13 -6
  22. snowflake/ml/model/_packager/model_env/model_env.py +45 -28
  23. snowflake/ml/model/_packager/model_handlers/_utils.py +19 -6
  24. snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
  25. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +17 -0
  26. snowflake/ml/model/_packager/model_handlers/keras.py +230 -0
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +1 -0
  28. snowflake/ml/model/_packager/model_handlers/sklearn.py +28 -3
  29. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +74 -21
  30. snowflake/ml/model/_packager/model_handlers/tensorflow.py +27 -49
  31. snowflake/ml/model/_packager/model_handlers_migrator/tensorflow_migrator_2023_12_01.py +48 -0
  32. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -1
  33. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +3 -0
  34. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
  35. snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -1
  36. snowflake/ml/model/_packager/model_task/model_task_utils.py +5 -1
  37. snowflake/ml/model/_signatures/base_handler.py +1 -2
  38. snowflake/ml/model/_signatures/builtins_handler.py +2 -2
  39. snowflake/ml/model/_signatures/core.py +2 -2
  40. snowflake/ml/model/_signatures/numpy_handler.py +11 -12
  41. snowflake/ml/model/_signatures/pandas_handler.py +11 -9
  42. snowflake/ml/model/_signatures/pytorch_handler.py +3 -6
  43. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  44. snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
  45. snowflake/ml/model/model_signature.py +25 -4
  46. snowflake/ml/model/type_hints.py +15 -0
  47. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +14 -1
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
  51. snowflake/ml/modeling/cluster/birch.py +6 -3
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
  53. snowflake/ml/modeling/cluster/dbscan.py +6 -3
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
  55. snowflake/ml/modeling/cluster/k_means.py +6 -3
  56. snowflake/ml/modeling/cluster/mean_shift.py +6 -3
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
  58. snowflake/ml/modeling/cluster/optics.py +6 -3
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
  62. snowflake/ml/modeling/compose/column_transformer.py +6 -3
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
  69. snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
  70. snowflake/ml/modeling/covariance/oas.py +6 -3
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
  74. snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
  79. snowflake/ml/modeling/decomposition/pca.py +6 -3
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
  108. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
  110. snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
  111. snowflake/ml/modeling/impute/knn_imputer.py +6 -3
  112. snowflake/ml/modeling/impute/missing_indicator.py +6 -3
  113. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
  114. snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
  115. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
  116. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
  117. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
  118. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
  119. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
  120. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
  121. snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
  122. snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
  123. snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
  124. snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
  125. snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
  126. snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
  127. snowflake/ml/modeling/linear_model/lars.py +6 -3
  128. snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
  129. snowflake/ml/modeling/linear_model/lasso.py +6 -3
  130. snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
  131. snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
  132. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
  133. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
  134. snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
  135. snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
  136. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
  137. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
  138. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
  139. snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
  140. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
  141. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
  142. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
  143. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
  144. snowflake/ml/modeling/linear_model/perceptron.py +6 -3
  145. snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
  146. snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
  147. snowflake/ml/modeling/linear_model/ridge.py +6 -3
  148. snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
  149. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
  150. snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
  151. snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
  152. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
  153. snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
  154. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
  155. snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
  156. snowflake/ml/modeling/manifold/isomap.py +6 -3
  157. snowflake/ml/modeling/manifold/mds.py +6 -3
  158. snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
  159. snowflake/ml/modeling/manifold/tsne.py +6 -3
  160. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
  161. snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
  162. snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
  163. snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
  164. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
  165. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
  166. snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
  167. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
  168. snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
  169. snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
  170. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
  171. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
  172. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
  173. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
  174. snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
  175. snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
  176. snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
  177. snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
  178. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
  179. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
  180. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
  181. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
  182. snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
  183. snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
  184. snowflake/ml/modeling/pipeline/pipeline.py +28 -3
  185. snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -5
  186. snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
  187. snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
  188. snowflake/ml/modeling/svm/linear_svc.py +6 -3
  189. snowflake/ml/modeling/svm/linear_svr.py +6 -3
  190. snowflake/ml/modeling/svm/nu_svc.py +6 -3
  191. snowflake/ml/modeling/svm/nu_svr.py +6 -3
  192. snowflake/ml/modeling/svm/svc.py +6 -3
  193. snowflake/ml/modeling/svm/svr.py +6 -3
  194. snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
  195. snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
  196. snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
  197. snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
  198. snowflake/ml/modeling/xgboost/xgb_classifier.py +6 -3
  199. snowflake/ml/modeling/xgboost/xgb_regressor.py +6 -3
  200. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +6 -3
  201. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +6 -3
  202. snowflake/ml/registry/registry.py +34 -4
  203. snowflake/ml/version.py +1 -1
  204. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/METADATA +81 -33
  205. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/RECORD +208 -196
  206. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/WHEEL +1 -1
  207. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/LICENSE.txt +0 -0
  208. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class ElasticNet(BaseTransformer):
61
64
  r"""Linear regression with combined L1 and L2 priors as regularizer
62
65
  For more details on this class, see [sklearn.linear_model.ElasticNet]
@@ -476,7 +479,7 @@ class ElasticNet(BaseTransformer):
476
479
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
477
480
  expected_dtype = "array"
478
481
  else:
479
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
482
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
480
483
  # We can only infer the output types from the input types if the following two statemetns are true:
481
484
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
482
485
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1133,7 +1136,7 @@ class ElasticNet(BaseTransformer):
1133
1136
 
1134
1137
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1135
1138
 
1136
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1139
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1137
1140
  outputs: List[BaseFeatureSpec] = []
1138
1141
  if hasattr(self, "predict"):
1139
1142
  # keep mypy happy
@@ -1141,7 +1144,7 @@ class ElasticNet(BaseTransformer):
1141
1144
  # For classifier, the type of predict is the same as the type of label
1142
1145
  if self._sklearn_object._estimator_type == "classifier":
1143
1146
  # label columns is the desired type for output
1144
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1147
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1145
1148
  # rename the output columns
1146
1149
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1147
1150
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class ElasticNetCV(BaseTransformer):
61
64
  r"""Elastic Net model with iterative fitting along a regularization path
62
65
  For more details on this class, see [sklearn.linear_model.ElasticNetCV]
@@ -509,7 +512,7 @@ class ElasticNetCV(BaseTransformer):
509
512
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
510
513
  expected_dtype = "array"
511
514
  else:
512
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
515
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
513
516
  # We can only infer the output types from the input types if the following two statemetns are true:
514
517
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
515
518
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1166,7 +1169,7 @@ class ElasticNetCV(BaseTransformer):
1166
1169
 
1167
1170
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1168
1171
 
1169
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1172
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1170
1173
  outputs: List[BaseFeatureSpec] = []
1171
1174
  if hasattr(self, "predict"):
1172
1175
  # keep mypy happy
@@ -1174,7 +1177,7 @@ class ElasticNetCV(BaseTransformer):
1174
1177
  # For classifier, the type of predict is the same as the type of label
1175
1178
  if self._sklearn_object._estimator_type == "classifier":
1176
1179
  # label columns is the desired type for output
1177
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1180
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1178
1181
  # rename the output columns
1179
1182
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1180
1183
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class GammaRegressor(BaseTransformer):
61
64
  r"""Generalized Linear Model with a Gamma distribution
62
65
  For more details on this class, see [sklearn.linear_model.GammaRegressor]
@@ -454,7 +457,7 @@ class GammaRegressor(BaseTransformer):
454
457
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
455
458
  expected_dtype = "array"
456
459
  else:
457
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
460
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
458
461
  # We can only infer the output types from the input types if the following two statemetns are true:
459
462
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
460
463
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1111,7 +1114,7 @@ class GammaRegressor(BaseTransformer):
1111
1114
 
1112
1115
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1113
1116
 
1114
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1117
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1115
1118
  outputs: List[BaseFeatureSpec] = []
1116
1119
  if hasattr(self, "predict"):
1117
1120
  # keep mypy happy
@@ -1119,7 +1122,7 @@ class GammaRegressor(BaseTransformer):
1119
1122
  # For classifier, the type of predict is the same as the type of label
1120
1123
  if self._sklearn_object._estimator_type == "classifier":
1121
1124
  # label columns is the desired type for output
1122
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1125
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1123
1126
  # rename the output columns
1124
1127
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1125
1128
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class HuberRegressor(BaseTransformer):
61
64
  r"""L2-regularized linear regression model that is robust to outliers
62
65
  For more details on this class, see [sklearn.linear_model.HuberRegressor]
@@ -437,7 +440,7 @@ class HuberRegressor(BaseTransformer):
437
440
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
438
441
  expected_dtype = "array"
439
442
  else:
440
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
443
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
441
444
  # We can only infer the output types from the input types if the following two statemetns are true:
442
445
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
443
446
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1094,7 +1097,7 @@ class HuberRegressor(BaseTransformer):
1094
1097
 
1095
1098
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1096
1099
 
1097
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1100
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1098
1101
  outputs: List[BaseFeatureSpec] = []
1099
1102
  if hasattr(self, "predict"):
1100
1103
  # keep mypy happy
@@ -1102,7 +1105,7 @@ class HuberRegressor(BaseTransformer):
1102
1105
  # For classifier, the type of predict is the same as the type of label
1103
1106
  if self._sklearn_object._estimator_type == "classifier":
1104
1107
  # label columns is the desired type for output
1105
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1108
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1106
1109
  # rename the output columns
1107
1110
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1108
1111
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class Lars(BaseTransformer):
61
64
  r"""Least Angle Regression model a
62
65
  For more details on this class, see [sklearn.linear_model.Lars]
@@ -456,7 +459,7 @@ class Lars(BaseTransformer):
456
459
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
457
460
  expected_dtype = "array"
458
461
  else:
459
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
462
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
460
463
  # We can only infer the output types from the input types if the following two statemetns are true:
461
464
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
462
465
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1113,7 +1116,7 @@ class Lars(BaseTransformer):
1113
1116
 
1114
1117
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1115
1118
 
1116
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1119
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1117
1120
  outputs: List[BaseFeatureSpec] = []
1118
1121
  if hasattr(self, "predict"):
1119
1122
  # keep mypy happy
@@ -1121,7 +1124,7 @@ class Lars(BaseTransformer):
1121
1124
  # For classifier, the type of predict is the same as the type of label
1122
1125
  if self._sklearn_object._estimator_type == "classifier":
1123
1126
  # label columns is the desired type for output
1124
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1127
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1125
1128
  # rename the output columns
1126
1129
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1127
1130
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LarsCV(BaseTransformer):
61
64
  r"""Cross-validated Least Angle Regression model
62
65
  For more details on this class, see [sklearn.linear_model.LarsCV]
@@ -464,7 +467,7 @@ class LarsCV(BaseTransformer):
464
467
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
465
468
  expected_dtype = "array"
466
469
  else:
467
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
470
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
468
471
  # We can only infer the output types from the input types if the following two statemetns are true:
469
472
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
470
473
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1121,7 +1124,7 @@ class LarsCV(BaseTransformer):
1121
1124
 
1122
1125
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1123
1126
 
1124
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1127
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1125
1128
  outputs: List[BaseFeatureSpec] = []
1126
1129
  if hasattr(self, "predict"):
1127
1130
  # keep mypy happy
@@ -1129,7 +1132,7 @@ class LarsCV(BaseTransformer):
1129
1132
  # For classifier, the type of predict is the same as the type of label
1130
1133
  if self._sklearn_object._estimator_type == "classifier":
1131
1134
  # label columns is the desired type for output
1132
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1135
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1133
1136
  # rename the output columns
1134
1137
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1135
1138
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class Lasso(BaseTransformer):
61
64
  r"""Linear Model trained with L1 prior as regularizer (aka the Lasso)
62
65
  For more details on this class, see [sklearn.linear_model.Lasso]
@@ -467,7 +470,7 @@ class Lasso(BaseTransformer):
467
470
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
468
471
  expected_dtype = "array"
469
472
  else:
470
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
473
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
471
474
  # We can only infer the output types from the input types if the following two statemetns are true:
472
475
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
473
476
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1124,7 +1127,7 @@ class Lasso(BaseTransformer):
1124
1127
 
1125
1128
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1126
1129
 
1127
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1130
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1128
1131
  outputs: List[BaseFeatureSpec] = []
1129
1132
  if hasattr(self, "predict"):
1130
1133
  # keep mypy happy
@@ -1132,7 +1135,7 @@ class Lasso(BaseTransformer):
1132
1135
  # For classifier, the type of predict is the same as the type of label
1133
1136
  if self._sklearn_object._estimator_type == "classifier":
1134
1137
  # label columns is the desired type for output
1135
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1138
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1136
1139
  # rename the output columns
1137
1140
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1138
1141
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LassoCV(BaseTransformer):
61
64
  r"""Lasso linear model with iterative fitting along a regularization path
62
65
  For more details on this class, see [sklearn.linear_model.LassoCV]
@@ -495,7 +498,7 @@ class LassoCV(BaseTransformer):
495
498
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
496
499
  expected_dtype = "array"
497
500
  else:
498
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
501
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
499
502
  # We can only infer the output types from the input types if the following two statemetns are true:
500
503
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
501
504
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1152,7 +1155,7 @@ class LassoCV(BaseTransformer):
1152
1155
 
1153
1156
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1154
1157
 
1155
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1158
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1156
1159
  outputs: List[BaseFeatureSpec] = []
1157
1160
  if hasattr(self, "predict"):
1158
1161
  # keep mypy happy
@@ -1160,7 +1163,7 @@ class LassoCV(BaseTransformer):
1160
1163
  # For classifier, the type of predict is the same as the type of label
1161
1164
  if self._sklearn_object._estimator_type == "classifier":
1162
1165
  # label columns is the desired type for output
1163
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1166
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1164
1167
  # rename the output columns
1165
1168
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1166
1169
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LassoLars(BaseTransformer):
61
64
  r"""Lasso model fit with Least Angle Regression a
62
65
  For more details on this class, see [sklearn.linear_model.LassoLars]
@@ -477,7 +480,7 @@ class LassoLars(BaseTransformer):
477
480
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
478
481
  expected_dtype = "array"
479
482
  else:
480
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
483
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
481
484
  # We can only infer the output types from the input types if the following two statemetns are true:
482
485
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
483
486
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1134,7 +1137,7 @@ class LassoLars(BaseTransformer):
1134
1137
 
1135
1138
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1136
1139
 
1137
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1140
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1138
1141
  outputs: List[BaseFeatureSpec] = []
1139
1142
  if hasattr(self, "predict"):
1140
1143
  # keep mypy happy
@@ -1142,7 +1145,7 @@ class LassoLars(BaseTransformer):
1142
1145
  # For classifier, the type of predict is the same as the type of label
1143
1146
  if self._sklearn_object._estimator_type == "classifier":
1144
1147
  # label columns is the desired type for output
1145
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1148
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1146
1149
  # rename the output columns
1147
1150
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1148
1151
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LassoLarsCV(BaseTransformer):
61
64
  r"""Cross-validated Lasso, using the LARS algorithm
62
65
  For more details on this class, see [sklearn.linear_model.LassoLarsCV]
@@ -478,7 +481,7 @@ class LassoLarsCV(BaseTransformer):
478
481
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
479
482
  expected_dtype = "array"
480
483
  else:
481
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
484
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
482
485
  # We can only infer the output types from the input types if the following two statemetns are true:
483
486
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
484
487
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1135,7 +1138,7 @@ class LassoLarsCV(BaseTransformer):
1135
1138
 
1136
1139
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1137
1140
 
1138
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1141
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1139
1142
  outputs: List[BaseFeatureSpec] = []
1140
1143
  if hasattr(self, "predict"):
1141
1144
  # keep mypy happy
@@ -1143,7 +1146,7 @@ class LassoLarsCV(BaseTransformer):
1143
1146
  # For classifier, the type of predict is the same as the type of label
1144
1147
  if self._sklearn_object._estimator_type == "classifier":
1145
1148
  # label columns is the desired type for output
1146
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1149
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1147
1150
  # rename the output columns
1148
1151
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1149
1152
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LassoLarsIC(BaseTransformer):
61
64
  r"""Lasso model fit with Lars using BIC or AIC for model selection
62
65
  For more details on this class, see [sklearn.linear_model.LassoLarsIC]
@@ -461,7 +464,7 @@ class LassoLarsIC(BaseTransformer):
461
464
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
462
465
  expected_dtype = "array"
463
466
  else:
464
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
467
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
465
468
  # We can only infer the output types from the input types if the following two statemetns are true:
466
469
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
467
470
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1118,7 +1121,7 @@ class LassoLarsIC(BaseTransformer):
1118
1121
 
1119
1122
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1120
1123
 
1121
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1124
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1122
1125
  outputs: List[BaseFeatureSpec] = []
1123
1126
  if hasattr(self, "predict"):
1124
1127
  # keep mypy happy
@@ -1126,7 +1129,7 @@ class LassoLarsIC(BaseTransformer):
1126
1129
  # For classifier, the type of predict is the same as the type of label
1127
1130
  if self._sklearn_object._estimator_type == "classifier":
1128
1131
  # label columns is the desired type for output
1129
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1132
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1130
1133
  # rename the output columns
1131
1134
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1132
1135
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LinearRegression(BaseTransformer):
61
64
  r"""Ordinary least squares Linear Regression
62
65
  For more details on this class, see [sklearn.linear_model.LinearRegression]
@@ -424,7 +427,7 @@ class LinearRegression(BaseTransformer):
424
427
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
425
428
  expected_dtype = "array"
426
429
  else:
427
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
430
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
428
431
  # We can only infer the output types from the input types if the following two statemetns are true:
429
432
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
430
433
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1081,7 +1084,7 @@ class LinearRegression(BaseTransformer):
1081
1084
 
1082
1085
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1083
1086
 
1084
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1087
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1085
1088
  outputs: List[BaseFeatureSpec] = []
1086
1089
  if hasattr(self, "predict"):
1087
1090
  # keep mypy happy
@@ -1089,7 +1092,7 @@ class LinearRegression(BaseTransformer):
1089
1092
  # For classifier, the type of predict is the same as the type of label
1090
1093
  if self._sklearn_object._estimator_type == "classifier":
1091
1094
  # label columns is the desired type for output
1092
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1095
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1093
1096
  # rename the output columns
1094
1097
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1095
1098
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LogisticRegression(BaseTransformer):
61
64
  r"""Logistic Regression (aka logit, MaxEnt) classifier
62
65
  For more details on this class, see [sklearn.linear_model.LogisticRegression]
@@ -544,7 +547,7 @@ class LogisticRegression(BaseTransformer):
544
547
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
545
548
  expected_dtype = "array"
546
549
  else:
547
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
550
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
548
551
  # We can only infer the output types from the input types if the following two statemetns are true:
549
552
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
550
553
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1207,7 +1210,7 @@ class LogisticRegression(BaseTransformer):
1207
1210
 
1208
1211
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1209
1212
 
1210
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1213
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1211
1214
  outputs: List[BaseFeatureSpec] = []
1212
1215
  if hasattr(self, "predict"):
1213
1216
  # keep mypy happy
@@ -1215,7 +1218,7 @@ class LogisticRegression(BaseTransformer):
1215
1218
  # For classifier, the type of predict is the same as the type of label
1216
1219
  if self._sklearn_object._estimator_type == "classifier":
1217
1220
  # label columns is the desired type for output
1218
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1221
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1219
1222
  # rename the output columns
1220
1223
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1221
1224
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LogisticRegressionCV(BaseTransformer):
61
64
  r"""Logistic Regression CV (aka logit, MaxEnt) classifier
62
65
  For more details on this class, see [sklearn.linear_model.LogisticRegressionCV]
@@ -565,7 +568,7 @@ class LogisticRegressionCV(BaseTransformer):
565
568
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
566
569
  expected_dtype = "array"
567
570
  else:
568
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
571
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
569
572
  # We can only infer the output types from the input types if the following two statemetns are true:
570
573
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
571
574
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1228,7 +1231,7 @@ class LogisticRegressionCV(BaseTransformer):
1228
1231
 
1229
1232
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1230
1233
 
1231
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1234
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1232
1235
  outputs: List[BaseFeatureSpec] = []
1233
1236
  if hasattr(self, "predict"):
1234
1237
  # keep mypy happy
@@ -1236,7 +1239,7 @@ class LogisticRegressionCV(BaseTransformer):
1236
1239
  # For classifier, the type of predict is the same as the type of label
1237
1240
  if self._sklearn_object._estimator_type == "classifier":
1238
1241
  # label columns is the desired type for output
1239
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1242
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1240
1243
  # rename the output columns
1241
1244
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1242
1245
  self._model_signature_dict["predict"] = ModelSignature(