snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. snowflake/cortex/_complete.py +19 -0
  2. snowflake/ml/_internal/platform_capabilities.py +87 -0
  3. snowflake/ml/dataset/dataset.py +0 -1
  4. snowflake/ml/fileset/fileset.py +6 -0
  5. snowflake/ml/jobs/__init__.py +21 -0
  6. snowflake/ml/jobs/_utils/constants.py +51 -0
  7. snowflake/ml/jobs/_utils/payload_utils.py +352 -0
  8. snowflake/ml/jobs/_utils/spec_utils.py +298 -0
  9. snowflake/ml/jobs/_utils/types.py +39 -0
  10. snowflake/ml/jobs/decorators.py +91 -0
  11. snowflake/ml/jobs/job.py +113 -0
  12. snowflake/ml/jobs/manager.py +298 -0
  13. snowflake/ml/model/_client/ops/model_ops.py +11 -2
  14. snowflake/ml/model/_client/ops/service_ops.py +1 -11
  15. snowflake/ml/model/_client/sql/service.py +13 -6
  16. snowflake/ml/model/_packager/model_handlers/_utils.py +12 -3
  17. snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
  18. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +1 -0
  19. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
  20. snowflake/ml/model/_signatures/base_handler.py +1 -2
  21. snowflake/ml/model/_signatures/builtins_handler.py +2 -2
  22. snowflake/ml/model/_signatures/numpy_handler.py +6 -7
  23. snowflake/ml/model/_signatures/pandas_handler.py +2 -2
  24. snowflake/ml/model/_signatures/pytorch_handler.py +2 -5
  25. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  26. snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
  27. snowflake/ml/model/model_signature.py +17 -4
  28. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
  29. snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
  30. snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
  31. snowflake/ml/modeling/cluster/birch.py +6 -3
  32. snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
  33. snowflake/ml/modeling/cluster/dbscan.py +6 -3
  34. snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
  35. snowflake/ml/modeling/cluster/k_means.py +6 -3
  36. snowflake/ml/modeling/cluster/mean_shift.py +6 -3
  37. snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
  38. snowflake/ml/modeling/cluster/optics.py +6 -3
  39. snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
  40. snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
  41. snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
  42. snowflake/ml/modeling/compose/column_transformer.py +6 -3
  43. snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
  44. snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
  45. snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
  46. snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
  47. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
  48. snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
  49. snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
  50. snowflake/ml/modeling/covariance/oas.py +6 -3
  51. snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
  52. snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
  53. snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
  54. snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
  55. snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
  56. snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
  57. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
  58. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
  59. snowflake/ml/modeling/decomposition/pca.py +6 -3
  60. snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
  61. snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
  62. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
  63. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
  64. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
  65. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
  66. snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
  67. snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
  68. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
  69. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
  70. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
  71. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
  72. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
  73. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
  74. snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
  75. snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
  76. snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
  77. snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
  78. snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
  79. snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
  80. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
  81. snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
  82. snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
  83. snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
  84. snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
  85. snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
  86. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
  87. snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
  88. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
  89. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
  90. snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
  91. snowflake/ml/modeling/impute/knn_imputer.py +6 -3
  92. snowflake/ml/modeling/impute/missing_indicator.py +6 -3
  93. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
  94. snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
  95. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
  96. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
  97. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
  98. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
  99. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
  100. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
  101. snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
  102. snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
  103. snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
  104. snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
  105. snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
  106. snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
  107. snowflake/ml/modeling/linear_model/lars.py +6 -3
  108. snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
  109. snowflake/ml/modeling/linear_model/lasso.py +6 -3
  110. snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
  111. snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
  112. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
  113. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
  114. snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
  115. snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
  116. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
  117. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
  118. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
  119. snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
  120. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
  121. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
  122. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
  123. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
  124. snowflake/ml/modeling/linear_model/perceptron.py +6 -3
  125. snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
  126. snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
  127. snowflake/ml/modeling/linear_model/ridge.py +6 -3
  128. snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
  129. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
  130. snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
  131. snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
  132. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
  133. snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
  134. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
  135. snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
  136. snowflake/ml/modeling/manifold/isomap.py +6 -3
  137. snowflake/ml/modeling/manifold/mds.py +6 -3
  138. snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
  139. snowflake/ml/modeling/manifold/tsne.py +6 -3
  140. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
  141. snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
  142. snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
  143. snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
  144. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
  145. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
  146. snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
  147. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
  148. snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
  149. snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
  150. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
  151. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
  152. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
  153. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
  154. snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
  155. snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
  156. snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
  157. snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
  158. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
  159. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
  160. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
  161. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
  162. snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
  163. snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
  164. snowflake/ml/modeling/pipeline/pipeline.py +10 -2
  165. snowflake/ml/modeling/preprocessing/polynomial_features.py +6 -3
  166. snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
  167. snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
  168. snowflake/ml/modeling/svm/linear_svc.py +6 -3
  169. snowflake/ml/modeling/svm/linear_svr.py +6 -3
  170. snowflake/ml/modeling/svm/nu_svc.py +6 -3
  171. snowflake/ml/modeling/svm/nu_svr.py +6 -3
  172. snowflake/ml/modeling/svm/svc.py +6 -3
  173. snowflake/ml/modeling/svm/svr.py +6 -3
  174. snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
  175. snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
  176. snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
  177. snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
  178. snowflake/ml/modeling/xgboost/xgb_classifier.py +6 -3
  179. snowflake/ml/modeling/xgboost/xgb_regressor.py +6 -3
  180. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +6 -3
  181. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +6 -3
  182. snowflake/ml/version.py +1 -1
  183. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/METADATA +29 -14
  184. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/RECORD +187 -178
  185. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/LICENSE.txt +0 -0
  186. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/WHEEL +0 -0
  187. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.kernel_ridge".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class KernelRidge(BaseTransformer):
61
64
  r"""Kernel ridge regression
62
65
  For more details on this class, see [sklearn.kernel_ridge.KernelRidge]
@@ -449,7 +452,7 @@ class KernelRidge(BaseTransformer):
449
452
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
450
453
  expected_dtype = "array"
451
454
  else:
452
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
455
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
453
456
  # We can only infer the output types from the input types if the following two statemetns are true:
454
457
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
455
458
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1106,7 +1109,7 @@ class KernelRidge(BaseTransformer):
1106
1109
 
1107
1110
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1108
1111
 
1109
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1112
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1110
1113
  outputs: List[BaseFeatureSpec] = []
1111
1114
  if hasattr(self, "predict"):
1112
1115
  # keep mypy happy
@@ -1114,7 +1117,7 @@ class KernelRidge(BaseTransformer):
1114
1117
  # For classifier, the type of predict is the same as the type of label
1115
1118
  if self._sklearn_object._estimator_type == "classifier":
1116
1119
  # label columns is the desired type for output
1117
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1120
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1118
1121
  # rename the output columns
1119
1122
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1120
1123
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "lightgbm".replace("sklearn.", ""
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LGBMClassifier(BaseTransformer):
61
64
  r"""LightGBM classifier
62
65
  For more details on this class, see [lightgbm.LGBMClassifier]
@@ -436,7 +439,7 @@ class LGBMClassifier(BaseTransformer):
436
439
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
437
440
  expected_dtype = "array"
438
441
  else:
439
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
442
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
440
443
  # We can only infer the output types from the input types if the following two statemetns are true:
441
444
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
442
445
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1097,7 +1100,7 @@ class LGBMClassifier(BaseTransformer):
1097
1100
 
1098
1101
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1099
1102
 
1100
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1103
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1101
1104
  outputs: List[BaseFeatureSpec] = []
1102
1105
  if hasattr(self, "predict"):
1103
1106
  # keep mypy happy
@@ -1105,7 +1108,7 @@ class LGBMClassifier(BaseTransformer):
1105
1108
  # For classifier, the type of predict is the same as the type of label
1106
1109
  if self._sklearn_object._estimator_type == "classifier":
1107
1110
  # label columns is the desired type for output
1108
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1111
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1109
1112
  # rename the output columns
1110
1113
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1111
1114
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "lightgbm".replace("sklearn.", ""
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LGBMRegressor(BaseTransformer):
61
64
  r"""LightGBM regressor
62
65
  For more details on this class, see [lightgbm.LGBMRegressor]
@@ -436,7 +439,7 @@ class LGBMRegressor(BaseTransformer):
436
439
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
437
440
  expected_dtype = "array"
438
441
  else:
439
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
442
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
440
443
  # We can only infer the output types from the input types if the following two statemetns are true:
441
444
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
442
445
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1093,7 +1096,7 @@ class LGBMRegressor(BaseTransformer):
1093
1096
 
1094
1097
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1095
1098
 
1096
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1099
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1097
1100
  outputs: List[BaseFeatureSpec] = []
1098
1101
  if hasattr(self, "predict"):
1099
1102
  # keep mypy happy
@@ -1101,7 +1104,7 @@ class LGBMRegressor(BaseTransformer):
1101
1104
  # For classifier, the type of predict is the same as the type of label
1102
1105
  if self._sklearn_object._estimator_type == "classifier":
1103
1106
  # label columns is the desired type for output
1104
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1107
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1105
1108
  # rename the output columns
1106
1109
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1107
1110
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class ARDRegression(BaseTransformer):
61
64
  r"""Bayesian ARD regression
62
65
  For more details on this class, see [sklearn.linear_model.ARDRegression]
@@ -458,7 +461,7 @@ class ARDRegression(BaseTransformer):
458
461
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
459
462
  expected_dtype = "array"
460
463
  else:
461
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
464
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
462
465
  # We can only infer the output types from the input types if the following two statemetns are true:
463
466
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
464
467
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1115,7 +1118,7 @@ class ARDRegression(BaseTransformer):
1115
1118
 
1116
1119
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1117
1120
 
1118
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1121
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1119
1122
  outputs: List[BaseFeatureSpec] = []
1120
1123
  if hasattr(self, "predict"):
1121
1124
  # keep mypy happy
@@ -1123,7 +1126,7 @@ class ARDRegression(BaseTransformer):
1123
1126
  # For classifier, the type of predict is the same as the type of label
1124
1127
  if self._sklearn_object._estimator_type == "classifier":
1125
1128
  # label columns is the desired type for output
1126
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1129
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1127
1130
  # rename the output columns
1128
1131
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1129
1132
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class BayesianRidge(BaseTransformer):
61
64
  r"""Bayesian ridge regression
62
65
  For more details on this class, see [sklearn.linear_model.BayesianRidge]
@@ -468,7 +471,7 @@ class BayesianRidge(BaseTransformer):
468
471
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
469
472
  expected_dtype = "array"
470
473
  else:
471
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
474
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
472
475
  # We can only infer the output types from the input types if the following two statemetns are true:
473
476
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
474
477
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1125,7 +1128,7 @@ class BayesianRidge(BaseTransformer):
1125
1128
 
1126
1129
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1127
1130
 
1128
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1131
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1129
1132
  outputs: List[BaseFeatureSpec] = []
1130
1133
  if hasattr(self, "predict"):
1131
1134
  # keep mypy happy
@@ -1133,7 +1136,7 @@ class BayesianRidge(BaseTransformer):
1133
1136
  # For classifier, the type of predict is the same as the type of label
1134
1137
  if self._sklearn_object._estimator_type == "classifier":
1135
1138
  # label columns is the desired type for output
1136
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1139
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1137
1140
  # rename the output columns
1138
1141
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1139
1142
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class ElasticNet(BaseTransformer):
61
64
  r"""Linear regression with combined L1 and L2 priors as regularizer
62
65
  For more details on this class, see [sklearn.linear_model.ElasticNet]
@@ -476,7 +479,7 @@ class ElasticNet(BaseTransformer):
476
479
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
477
480
  expected_dtype = "array"
478
481
  else:
479
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
482
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
480
483
  # We can only infer the output types from the input types if the following two statemetns are true:
481
484
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
482
485
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1133,7 +1136,7 @@ class ElasticNet(BaseTransformer):
1133
1136
 
1134
1137
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1135
1138
 
1136
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1139
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1137
1140
  outputs: List[BaseFeatureSpec] = []
1138
1141
  if hasattr(self, "predict"):
1139
1142
  # keep mypy happy
@@ -1141,7 +1144,7 @@ class ElasticNet(BaseTransformer):
1141
1144
  # For classifier, the type of predict is the same as the type of label
1142
1145
  if self._sklearn_object._estimator_type == "classifier":
1143
1146
  # label columns is the desired type for output
1144
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1147
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1145
1148
  # rename the output columns
1146
1149
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1147
1150
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class ElasticNetCV(BaseTransformer):
61
64
  r"""Elastic Net model with iterative fitting along a regularization path
62
65
  For more details on this class, see [sklearn.linear_model.ElasticNetCV]
@@ -509,7 +512,7 @@ class ElasticNetCV(BaseTransformer):
509
512
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
510
513
  expected_dtype = "array"
511
514
  else:
512
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
515
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
513
516
  # We can only infer the output types from the input types if the following two statemetns are true:
514
517
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
515
518
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1166,7 +1169,7 @@ class ElasticNetCV(BaseTransformer):
1166
1169
 
1167
1170
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1168
1171
 
1169
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1172
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1170
1173
  outputs: List[BaseFeatureSpec] = []
1171
1174
  if hasattr(self, "predict"):
1172
1175
  # keep mypy happy
@@ -1174,7 +1177,7 @@ class ElasticNetCV(BaseTransformer):
1174
1177
  # For classifier, the type of predict is the same as the type of label
1175
1178
  if self._sklearn_object._estimator_type == "classifier":
1176
1179
  # label columns is the desired type for output
1177
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1180
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1178
1181
  # rename the output columns
1179
1182
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1180
1183
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class GammaRegressor(BaseTransformer):
61
64
  r"""Generalized Linear Model with a Gamma distribution
62
65
  For more details on this class, see [sklearn.linear_model.GammaRegressor]
@@ -454,7 +457,7 @@ class GammaRegressor(BaseTransformer):
454
457
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
455
458
  expected_dtype = "array"
456
459
  else:
457
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
460
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
458
461
  # We can only infer the output types from the input types if the following two statemetns are true:
459
462
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
460
463
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1111,7 +1114,7 @@ class GammaRegressor(BaseTransformer):
1111
1114
 
1112
1115
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1113
1116
 
1114
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1117
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1115
1118
  outputs: List[BaseFeatureSpec] = []
1116
1119
  if hasattr(self, "predict"):
1117
1120
  # keep mypy happy
@@ -1119,7 +1122,7 @@ class GammaRegressor(BaseTransformer):
1119
1122
  # For classifier, the type of predict is the same as the type of label
1120
1123
  if self._sklearn_object._estimator_type == "classifier":
1121
1124
  # label columns is the desired type for output
1122
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1125
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1123
1126
  # rename the output columns
1124
1127
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1125
1128
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class HuberRegressor(BaseTransformer):
61
64
  r"""L2-regularized linear regression model that is robust to outliers
62
65
  For more details on this class, see [sklearn.linear_model.HuberRegressor]
@@ -437,7 +440,7 @@ class HuberRegressor(BaseTransformer):
437
440
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
438
441
  expected_dtype = "array"
439
442
  else:
440
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
443
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
441
444
  # We can only infer the output types from the input types if the following two statemetns are true:
442
445
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
443
446
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1094,7 +1097,7 @@ class HuberRegressor(BaseTransformer):
1094
1097
 
1095
1098
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1096
1099
 
1097
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1100
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1098
1101
  outputs: List[BaseFeatureSpec] = []
1099
1102
  if hasattr(self, "predict"):
1100
1103
  # keep mypy happy
@@ -1102,7 +1105,7 @@ class HuberRegressor(BaseTransformer):
1102
1105
  # For classifier, the type of predict is the same as the type of label
1103
1106
  if self._sklearn_object._estimator_type == "classifier":
1104
1107
  # label columns is the desired type for output
1105
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1108
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1106
1109
  # rename the output columns
1107
1110
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1108
1111
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class Lars(BaseTransformer):
61
64
  r"""Least Angle Regression model a
62
65
  For more details on this class, see [sklearn.linear_model.Lars]
@@ -456,7 +459,7 @@ class Lars(BaseTransformer):
456
459
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
457
460
  expected_dtype = "array"
458
461
  else:
459
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
462
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
460
463
  # We can only infer the output types from the input types if the following two statemetns are true:
461
464
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
462
465
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1113,7 +1116,7 @@ class Lars(BaseTransformer):
1113
1116
 
1114
1117
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1115
1118
 
1116
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1119
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1117
1120
  outputs: List[BaseFeatureSpec] = []
1118
1121
  if hasattr(self, "predict"):
1119
1122
  # keep mypy happy
@@ -1121,7 +1124,7 @@ class Lars(BaseTransformer):
1121
1124
  # For classifier, the type of predict is the same as the type of label
1122
1125
  if self._sklearn_object._estimator_type == "classifier":
1123
1126
  # label columns is the desired type for output
1124
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1127
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1125
1128
  # rename the output columns
1126
1129
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1127
1130
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LarsCV(BaseTransformer):
61
64
  r"""Cross-validated Least Angle Regression model
62
65
  For more details on this class, see [sklearn.linear_model.LarsCV]
@@ -464,7 +467,7 @@ class LarsCV(BaseTransformer):
464
467
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
465
468
  expected_dtype = "array"
466
469
  else:
467
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
470
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
468
471
  # We can only infer the output types from the input types if the following two statemetns are true:
469
472
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
470
473
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1121,7 +1124,7 @@ class LarsCV(BaseTransformer):
1121
1124
 
1122
1125
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1123
1126
 
1124
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1127
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1125
1128
  outputs: List[BaseFeatureSpec] = []
1126
1129
  if hasattr(self, "predict"):
1127
1130
  # keep mypy happy
@@ -1129,7 +1132,7 @@ class LarsCV(BaseTransformer):
1129
1132
  # For classifier, the type of predict is the same as the type of label
1130
1133
  if self._sklearn_object._estimator_type == "classifier":
1131
1134
  # label columns is the desired type for output
1132
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1135
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1133
1136
  # rename the output columns
1134
1137
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1135
1138
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class Lasso(BaseTransformer):
61
64
  r"""Linear Model trained with L1 prior as regularizer (aka the Lasso)
62
65
  For more details on this class, see [sklearn.linear_model.Lasso]
@@ -467,7 +470,7 @@ class Lasso(BaseTransformer):
467
470
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
468
471
  expected_dtype = "array"
469
472
  else:
470
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
473
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
471
474
  # We can only infer the output types from the input types if the following two statemetns are true:
472
475
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
473
476
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1124,7 +1127,7 @@ class Lasso(BaseTransformer):
1124
1127
 
1125
1128
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1126
1129
 
1127
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1130
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1128
1131
  outputs: List[BaseFeatureSpec] = []
1129
1132
  if hasattr(self, "predict"):
1130
1133
  # keep mypy happy
@@ -1132,7 +1135,7 @@ class Lasso(BaseTransformer):
1132
1135
  # For classifier, the type of predict is the same as the type of label
1133
1136
  if self._sklearn_object._estimator_type == "classifier":
1134
1137
  # label columns is the desired type for output
1135
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1138
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1136
1139
  # rename the output columns
1137
1140
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1138
1141
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LassoCV(BaseTransformer):
61
64
  r"""Lasso linear model with iterative fitting along a regularization path
62
65
  For more details on this class, see [sklearn.linear_model.LassoCV]
@@ -495,7 +498,7 @@ class LassoCV(BaseTransformer):
495
498
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
496
499
  expected_dtype = "array"
497
500
  else:
498
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
501
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
499
502
  # We can only infer the output types from the input types if the following two statemetns are true:
500
503
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
501
504
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1152,7 +1155,7 @@ class LassoCV(BaseTransformer):
1152
1155
 
1153
1156
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1154
1157
 
1155
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1158
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1156
1159
  outputs: List[BaseFeatureSpec] = []
1157
1160
  if hasattr(self, "predict"):
1158
1161
  # keep mypy happy
@@ -1160,7 +1163,7 @@ class LassoCV(BaseTransformer):
1160
1163
  # For classifier, the type of predict is the same as the type of label
1161
1164
  if self._sklearn_object._estimator_type == "classifier":
1162
1165
  # label columns is the desired type for output
1163
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1166
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1164
1167
  # rename the output columns
1165
1168
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1166
1169
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LassoLars(BaseTransformer):
61
64
  r"""Lasso model fit with Least Angle Regression a
62
65
  For more details on this class, see [sklearn.linear_model.LassoLars]
@@ -477,7 +480,7 @@ class LassoLars(BaseTransformer):
477
480
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
478
481
  expected_dtype = "array"
479
482
  else:
480
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
483
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
481
484
  # We can only infer the output types from the input types if the following two statemetns are true:
482
485
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
483
486
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1134,7 +1137,7 @@ class LassoLars(BaseTransformer):
1134
1137
 
1135
1138
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1136
1139
 
1137
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1140
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1138
1141
  outputs: List[BaseFeatureSpec] = []
1139
1142
  if hasattr(self, "predict"):
1140
1143
  # keep mypy happy
@@ -1142,7 +1145,7 @@ class LassoLars(BaseTransformer):
1142
1145
  # For classifier, the type of predict is the same as the type of label
1143
1146
  if self._sklearn_object._estimator_type == "classifier":
1144
1147
  # label columns is the desired type for output
1145
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1148
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1146
1149
  # rename the output columns
1147
1150
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1148
1151
  self._model_signature_dict["predict"] = ModelSignature(