snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. snowflake/cortex/_complete.py +19 -0
  2. snowflake/ml/_internal/platform_capabilities.py +87 -0
  3. snowflake/ml/dataset/dataset.py +0 -1
  4. snowflake/ml/fileset/fileset.py +6 -0
  5. snowflake/ml/jobs/__init__.py +21 -0
  6. snowflake/ml/jobs/_utils/constants.py +51 -0
  7. snowflake/ml/jobs/_utils/payload_utils.py +352 -0
  8. snowflake/ml/jobs/_utils/spec_utils.py +298 -0
  9. snowflake/ml/jobs/_utils/types.py +39 -0
  10. snowflake/ml/jobs/decorators.py +91 -0
  11. snowflake/ml/jobs/job.py +113 -0
  12. snowflake/ml/jobs/manager.py +298 -0
  13. snowflake/ml/model/_client/ops/model_ops.py +11 -2
  14. snowflake/ml/model/_client/ops/service_ops.py +1 -11
  15. snowflake/ml/model/_client/sql/service.py +13 -6
  16. snowflake/ml/model/_packager/model_handlers/_utils.py +12 -3
  17. snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
  18. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +1 -0
  19. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
  20. snowflake/ml/model/_signatures/base_handler.py +1 -2
  21. snowflake/ml/model/_signatures/builtins_handler.py +2 -2
  22. snowflake/ml/model/_signatures/numpy_handler.py +6 -7
  23. snowflake/ml/model/_signatures/pandas_handler.py +2 -2
  24. snowflake/ml/model/_signatures/pytorch_handler.py +2 -5
  25. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  26. snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
  27. snowflake/ml/model/model_signature.py +17 -4
  28. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
  29. snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
  30. snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
  31. snowflake/ml/modeling/cluster/birch.py +6 -3
  32. snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
  33. snowflake/ml/modeling/cluster/dbscan.py +6 -3
  34. snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
  35. snowflake/ml/modeling/cluster/k_means.py +6 -3
  36. snowflake/ml/modeling/cluster/mean_shift.py +6 -3
  37. snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
  38. snowflake/ml/modeling/cluster/optics.py +6 -3
  39. snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
  40. snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
  41. snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
  42. snowflake/ml/modeling/compose/column_transformer.py +6 -3
  43. snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
  44. snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
  45. snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
  46. snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
  47. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
  48. snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
  49. snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
  50. snowflake/ml/modeling/covariance/oas.py +6 -3
  51. snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
  52. snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
  53. snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
  54. snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
  55. snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
  56. snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
  57. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
  58. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
  59. snowflake/ml/modeling/decomposition/pca.py +6 -3
  60. snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
  61. snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
  62. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
  63. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
  64. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
  65. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
  66. snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
  67. snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
  68. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
  69. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
  70. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
  71. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
  72. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
  73. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
  74. snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
  75. snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
  76. snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
  77. snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
  78. snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
  79. snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
  80. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
  81. snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
  82. snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
  83. snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
  84. snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
  85. snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
  86. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
  87. snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
  88. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
  89. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
  90. snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
  91. snowflake/ml/modeling/impute/knn_imputer.py +6 -3
  92. snowflake/ml/modeling/impute/missing_indicator.py +6 -3
  93. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
  94. snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
  95. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
  96. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
  97. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
  98. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
  99. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
  100. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
  101. snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
  102. snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
  103. snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
  104. snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
  105. snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
  106. snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
  107. snowflake/ml/modeling/linear_model/lars.py +6 -3
  108. snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
  109. snowflake/ml/modeling/linear_model/lasso.py +6 -3
  110. snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
  111. snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
  112. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
  113. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
  114. snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
  115. snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
  116. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
  117. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
  118. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
  119. snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
  120. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
  121. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
  122. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
  123. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
  124. snowflake/ml/modeling/linear_model/perceptron.py +6 -3
  125. snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
  126. snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
  127. snowflake/ml/modeling/linear_model/ridge.py +6 -3
  128. snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
  129. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
  130. snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
  131. snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
  132. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
  133. snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
  134. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
  135. snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
  136. snowflake/ml/modeling/manifold/isomap.py +6 -3
  137. snowflake/ml/modeling/manifold/mds.py +6 -3
  138. snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
  139. snowflake/ml/modeling/manifold/tsne.py +6 -3
  140. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
  141. snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
  142. snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
  143. snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
  144. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
  145. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
  146. snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
  147. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
  148. snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
  149. snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
  150. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
  151. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
  152. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
  153. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
  154. snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
  155. snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
  156. snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
  157. snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
  158. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
  159. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
  160. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
  161. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
  162. snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
  163. snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
  164. snowflake/ml/modeling/pipeline/pipeline.py +10 -2
  165. snowflake/ml/modeling/preprocessing/polynomial_features.py +6 -3
  166. snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
  167. snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
  168. snowflake/ml/modeling/svm/linear_svc.py +6 -3
  169. snowflake/ml/modeling/svm/linear_svr.py +6 -3
  170. snowflake/ml/modeling/svm/nu_svc.py +6 -3
  171. snowflake/ml/modeling/svm/nu_svr.py +6 -3
  172. snowflake/ml/modeling/svm/svc.py +6 -3
  173. snowflake/ml/modeling/svm/svr.py +6 -3
  174. snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
  175. snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
  176. snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
  177. snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
  178. snowflake/ml/modeling/xgboost/xgb_classifier.py +6 -3
  179. snowflake/ml/modeling/xgboost/xgb_regressor.py +6 -3
  180. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +6 -3
  181. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +6 -3
  182. snowflake/ml/version.py +1 -1
  183. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/METADATA +29 -14
  184. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/RECORD +187 -178
  185. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/LICENSE.txt +0 -0
  186. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/WHEEL +0 -0
  187. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LassoLarsCV(BaseTransformer):
61
64
  r"""Cross-validated Lasso, using the LARS algorithm
62
65
  For more details on this class, see [sklearn.linear_model.LassoLarsCV]
@@ -478,7 +481,7 @@ class LassoLarsCV(BaseTransformer):
478
481
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
479
482
  expected_dtype = "array"
480
483
  else:
481
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
484
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
482
485
  # We can only infer the output types from the input types if the following two statemetns are true:
483
486
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
484
487
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1135,7 +1138,7 @@ class LassoLarsCV(BaseTransformer):
1135
1138
 
1136
1139
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1137
1140
 
1138
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1141
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1139
1142
  outputs: List[BaseFeatureSpec] = []
1140
1143
  if hasattr(self, "predict"):
1141
1144
  # keep mypy happy
@@ -1143,7 +1146,7 @@ class LassoLarsCV(BaseTransformer):
1143
1146
  # For classifier, the type of predict is the same as the type of label
1144
1147
  if self._sklearn_object._estimator_type == "classifier":
1145
1148
  # label columns is the desired type for output
1146
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1149
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1147
1150
  # rename the output columns
1148
1151
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1149
1152
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LassoLarsIC(BaseTransformer):
61
64
  r"""Lasso model fit with Lars using BIC or AIC for model selection
62
65
  For more details on this class, see [sklearn.linear_model.LassoLarsIC]
@@ -461,7 +464,7 @@ class LassoLarsIC(BaseTransformer):
461
464
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
462
465
  expected_dtype = "array"
463
466
  else:
464
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
467
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
465
468
  # We can only infer the output types from the input types if the following two statemetns are true:
466
469
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
467
470
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1118,7 +1121,7 @@ class LassoLarsIC(BaseTransformer):
1118
1121
 
1119
1122
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1120
1123
 
1121
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1124
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1122
1125
  outputs: List[BaseFeatureSpec] = []
1123
1126
  if hasattr(self, "predict"):
1124
1127
  # keep mypy happy
@@ -1126,7 +1129,7 @@ class LassoLarsIC(BaseTransformer):
1126
1129
  # For classifier, the type of predict is the same as the type of label
1127
1130
  if self._sklearn_object._estimator_type == "classifier":
1128
1131
  # label columns is the desired type for output
1129
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1132
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1130
1133
  # rename the output columns
1131
1134
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1132
1135
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LinearRegression(BaseTransformer):
61
64
  r"""Ordinary least squares Linear Regression
62
65
  For more details on this class, see [sklearn.linear_model.LinearRegression]
@@ -424,7 +427,7 @@ class LinearRegression(BaseTransformer):
424
427
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
425
428
  expected_dtype = "array"
426
429
  else:
427
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
430
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
428
431
  # We can only infer the output types from the input types if the following two statemetns are true:
429
432
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
430
433
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1081,7 +1084,7 @@ class LinearRegression(BaseTransformer):
1081
1084
 
1082
1085
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1083
1086
 
1084
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1087
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1085
1088
  outputs: List[BaseFeatureSpec] = []
1086
1089
  if hasattr(self, "predict"):
1087
1090
  # keep mypy happy
@@ -1089,7 +1092,7 @@ class LinearRegression(BaseTransformer):
1089
1092
  # For classifier, the type of predict is the same as the type of label
1090
1093
  if self._sklearn_object._estimator_type == "classifier":
1091
1094
  # label columns is the desired type for output
1092
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1095
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1093
1096
  # rename the output columns
1094
1097
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1095
1098
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LogisticRegression(BaseTransformer):
61
64
  r"""Logistic Regression (aka logit, MaxEnt) classifier
62
65
  For more details on this class, see [sklearn.linear_model.LogisticRegression]
@@ -544,7 +547,7 @@ class LogisticRegression(BaseTransformer):
544
547
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
545
548
  expected_dtype = "array"
546
549
  else:
547
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
550
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
548
551
  # We can only infer the output types from the input types if the following two statemetns are true:
549
552
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
550
553
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1207,7 +1210,7 @@ class LogisticRegression(BaseTransformer):
1207
1210
 
1208
1211
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1209
1212
 
1210
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1213
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1211
1214
  outputs: List[BaseFeatureSpec] = []
1212
1215
  if hasattr(self, "predict"):
1213
1216
  # keep mypy happy
@@ -1215,7 +1218,7 @@ class LogisticRegression(BaseTransformer):
1215
1218
  # For classifier, the type of predict is the same as the type of label
1216
1219
  if self._sklearn_object._estimator_type == "classifier":
1217
1220
  # label columns is the desired type for output
1218
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1221
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1219
1222
  # rename the output columns
1220
1223
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1221
1224
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LogisticRegressionCV(BaseTransformer):
61
64
  r"""Logistic Regression CV (aka logit, MaxEnt) classifier
62
65
  For more details on this class, see [sklearn.linear_model.LogisticRegressionCV]
@@ -565,7 +568,7 @@ class LogisticRegressionCV(BaseTransformer):
565
568
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
566
569
  expected_dtype = "array"
567
570
  else:
568
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
571
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
569
572
  # We can only infer the output types from the input types if the following two statemetns are true:
570
573
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
571
574
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1228,7 +1231,7 @@ class LogisticRegressionCV(BaseTransformer):
1228
1231
 
1229
1232
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1230
1233
 
1231
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1234
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1232
1235
  outputs: List[BaseFeatureSpec] = []
1233
1236
  if hasattr(self, "predict"):
1234
1237
  # keep mypy happy
@@ -1236,7 +1239,7 @@ class LogisticRegressionCV(BaseTransformer):
1236
1239
  # For classifier, the type of predict is the same as the type of label
1237
1240
  if self._sklearn_object._estimator_type == "classifier":
1238
1241
  # label columns is the desired type for output
1239
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1242
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1240
1243
  # rename the output columns
1241
1244
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1242
1245
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class MultiTaskElasticNet(BaseTransformer):
61
64
  r"""Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer
62
65
  For more details on this class, see [sklearn.linear_model.MultiTaskElasticNet]
@@ -457,7 +460,7 @@ class MultiTaskElasticNet(BaseTransformer):
457
460
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
458
461
  expected_dtype = "array"
459
462
  else:
460
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
463
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
461
464
  # We can only infer the output types from the input types if the following two statemetns are true:
462
465
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
463
466
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1114,7 +1117,7 @@ class MultiTaskElasticNet(BaseTransformer):
1114
1117
 
1115
1118
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1116
1119
 
1117
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1120
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1118
1121
  outputs: List[BaseFeatureSpec] = []
1119
1122
  if hasattr(self, "predict"):
1120
1123
  # keep mypy happy
@@ -1122,7 +1125,7 @@ class MultiTaskElasticNet(BaseTransformer):
1122
1125
  # For classifier, the type of predict is the same as the type of label
1123
1126
  if self._sklearn_object._estimator_type == "classifier":
1124
1127
  # label columns is the desired type for output
1125
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1128
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1126
1129
  # rename the output columns
1127
1130
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1128
1131
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class MultiTaskElasticNetCV(BaseTransformer):
61
64
  r"""Multi-task L1/L2 ElasticNet with built-in cross-validation
62
65
  For more details on this class, see [sklearn.linear_model.MultiTaskElasticNetCV]
@@ -498,7 +501,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
498
501
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
499
502
  expected_dtype = "array"
500
503
  else:
501
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
504
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
502
505
  # We can only infer the output types from the input types if the following two statemetns are true:
503
506
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
504
507
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1155,7 +1158,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
1155
1158
 
1156
1159
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1157
1160
 
1158
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1161
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1159
1162
  outputs: List[BaseFeatureSpec] = []
1160
1163
  if hasattr(self, "predict"):
1161
1164
  # keep mypy happy
@@ -1163,7 +1166,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
1163
1166
  # For classifier, the type of predict is the same as the type of label
1164
1167
  if self._sklearn_object._estimator_type == "classifier":
1165
1168
  # label columns is the desired type for output
1166
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1169
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1167
1170
  # rename the output columns
1168
1171
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1169
1172
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class MultiTaskLasso(BaseTransformer):
61
64
  r"""Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer
62
65
  For more details on this class, see [sklearn.linear_model.MultiTaskLasso]
@@ -449,7 +452,7 @@ class MultiTaskLasso(BaseTransformer):
449
452
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
450
453
  expected_dtype = "array"
451
454
  else:
452
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
455
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
453
456
  # We can only infer the output types from the input types if the following two statemetns are true:
454
457
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
455
458
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1106,7 +1109,7 @@ class MultiTaskLasso(BaseTransformer):
1106
1109
 
1107
1110
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1108
1111
 
1109
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1112
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1110
1113
  outputs: List[BaseFeatureSpec] = []
1111
1114
  if hasattr(self, "predict"):
1112
1115
  # keep mypy happy
@@ -1114,7 +1117,7 @@ class MultiTaskLasso(BaseTransformer):
1114
1117
  # For classifier, the type of predict is the same as the type of label
1115
1118
  if self._sklearn_object._estimator_type == "classifier":
1116
1119
  # label columns is the desired type for output
1117
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1120
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1118
1121
  # rename the output columns
1119
1122
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1120
1123
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class MultiTaskLassoCV(BaseTransformer):
61
64
  r"""Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer
62
65
  For more details on this class, see [sklearn.linear_model.MultiTaskLassoCV]
@@ -484,7 +487,7 @@ class MultiTaskLassoCV(BaseTransformer):
484
487
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
485
488
  expected_dtype = "array"
486
489
  else:
487
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
490
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
488
491
  # We can only infer the output types from the input types if the following two statemetns are true:
489
492
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
490
493
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1141,7 +1144,7 @@ class MultiTaskLassoCV(BaseTransformer):
1141
1144
 
1142
1145
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1143
1146
 
1144
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1147
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1145
1148
  outputs: List[BaseFeatureSpec] = []
1146
1149
  if hasattr(self, "predict"):
1147
1150
  # keep mypy happy
@@ -1149,7 +1152,7 @@ class MultiTaskLassoCV(BaseTransformer):
1149
1152
  # For classifier, the type of predict is the same as the type of label
1150
1153
  if self._sklearn_object._estimator_type == "classifier":
1151
1154
  # label columns is the desired type for output
1152
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1155
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1153
1156
  # rename the output columns
1154
1157
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1155
1158
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class OrthogonalMatchingPursuit(BaseTransformer):
61
64
  r"""Orthogonal Matching Pursuit model (OMP)
62
65
  For more details on this class, see [sklearn.linear_model.OrthogonalMatchingPursuit]
@@ -423,7 +426,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
423
426
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
424
427
  expected_dtype = "array"
425
428
  else:
426
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
429
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
427
430
  # We can only infer the output types from the input types if the following two statemetns are true:
428
431
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
429
432
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1080,7 +1083,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
1080
1083
 
1081
1084
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1082
1085
 
1083
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1086
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1084
1087
  outputs: List[BaseFeatureSpec] = []
1085
1088
  if hasattr(self, "predict"):
1086
1089
  # keep mypy happy
@@ -1088,7 +1091,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
1088
1091
  # For classifier, the type of predict is the same as the type of label
1089
1092
  if self._sklearn_object._estimator_type == "classifier":
1090
1093
  # label columns is the desired type for output
1091
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1094
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1092
1095
  # rename the output columns
1093
1096
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1094
1097
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class PassiveAggressiveClassifier(BaseTransformer):
61
64
  r"""Passive Aggressive Classifier
62
65
  For more details on this class, see [sklearn.linear_model.PassiveAggressiveClassifier]
@@ -506,7 +509,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
506
509
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
507
510
  expected_dtype = "array"
508
511
  else:
509
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
512
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
510
513
  # We can only infer the output types from the input types if the following two statemetns are true:
511
514
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
512
515
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1165,7 +1168,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
1165
1168
 
1166
1169
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1167
1170
 
1168
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1171
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1169
1172
  outputs: List[BaseFeatureSpec] = []
1170
1173
  if hasattr(self, "predict"):
1171
1174
  # keep mypy happy
@@ -1173,7 +1176,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
1173
1176
  # For classifier, the type of predict is the same as the type of label
1174
1177
  if self._sklearn_object._estimator_type == "classifier":
1175
1178
  # label columns is the desired type for output
1176
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1179
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1177
1180
  # rename the output columns
1178
1181
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1179
1182
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class PassiveAggressiveRegressor(BaseTransformer):
61
64
  r"""Passive Aggressive Regressor
62
65
  For more details on this class, see [sklearn.linear_model.PassiveAggressiveRegressor]
@@ -492,7 +495,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
492
495
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
493
496
  expected_dtype = "array"
494
497
  else:
495
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
498
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
496
499
  # We can only infer the output types from the input types if the following two statemetns are true:
497
500
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
498
501
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1149,7 +1152,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
1149
1152
 
1150
1153
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1151
1154
 
1152
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1155
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1153
1156
  outputs: List[BaseFeatureSpec] = []
1154
1157
  if hasattr(self, "predict"):
1155
1158
  # keep mypy happy
@@ -1157,7 +1160,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
1157
1160
  # For classifier, the type of predict is the same as the type of label
1158
1161
  if self._sklearn_object._estimator_type == "classifier":
1159
1162
  # label columns is the desired type for output
1160
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1163
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1161
1164
  # rename the output columns
1162
1165
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1163
1166
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class Perceptron(BaseTransformer):
61
64
  r"""Linear perceptron classifier
62
65
  For more details on this class, see [sklearn.linear_model.Perceptron]
@@ -505,7 +508,7 @@ class Perceptron(BaseTransformer):
505
508
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
506
509
  expected_dtype = "array"
507
510
  else:
508
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
511
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
509
512
  # We can only infer the output types from the input types if the following two statemetns are true:
510
513
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
511
514
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1164,7 +1167,7 @@ class Perceptron(BaseTransformer):
1164
1167
 
1165
1168
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1166
1169
 
1167
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1170
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1168
1171
  outputs: List[BaseFeatureSpec] = []
1169
1172
  if hasattr(self, "predict"):
1170
1173
  # keep mypy happy
@@ -1172,7 +1175,7 @@ class Perceptron(BaseTransformer):
1172
1175
  # For classifier, the type of predict is the same as the type of label
1173
1176
  if self._sklearn_object._estimator_type == "classifier":
1174
1177
  # label columns is the desired type for output
1175
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1178
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1176
1179
  # rename the output columns
1177
1180
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1178
1181
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class PoissonRegressor(BaseTransformer):
61
64
  r"""Generalized Linear Model with a Poisson distribution
62
65
  For more details on this class, see [sklearn.linear_model.PoissonRegressor]
@@ -454,7 +457,7 @@ class PoissonRegressor(BaseTransformer):
454
457
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
455
458
  expected_dtype = "array"
456
459
  else:
457
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
460
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
458
461
  # We can only infer the output types from the input types if the following two statemetns are true:
459
462
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
460
463
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1111,7 +1114,7 @@ class PoissonRegressor(BaseTransformer):
1111
1114
 
1112
1115
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1113
1116
 
1114
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1117
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1115
1118
  outputs: List[BaseFeatureSpec] = []
1116
1119
  if hasattr(self, "predict"):
1117
1120
  # keep mypy happy
@@ -1119,7 +1122,7 @@ class PoissonRegressor(BaseTransformer):
1119
1122
  # For classifier, the type of predict is the same as the type of label
1120
1123
  if self._sklearn_object._estimator_type == "classifier":
1121
1124
  # label columns is the desired type for output
1122
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1125
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1123
1126
  # rename the output columns
1124
1127
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1125
1128
  self._model_signature_dict["predict"] = ModelSignature(