snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. snowflake/cortex/_complete.py +19 -0
  2. snowflake/ml/_internal/platform_capabilities.py +87 -0
  3. snowflake/ml/dataset/dataset.py +0 -1
  4. snowflake/ml/fileset/fileset.py +6 -0
  5. snowflake/ml/jobs/__init__.py +21 -0
  6. snowflake/ml/jobs/_utils/constants.py +51 -0
  7. snowflake/ml/jobs/_utils/payload_utils.py +352 -0
  8. snowflake/ml/jobs/_utils/spec_utils.py +298 -0
  9. snowflake/ml/jobs/_utils/types.py +39 -0
  10. snowflake/ml/jobs/decorators.py +91 -0
  11. snowflake/ml/jobs/job.py +113 -0
  12. snowflake/ml/jobs/manager.py +298 -0
  13. snowflake/ml/model/_client/ops/model_ops.py +11 -2
  14. snowflake/ml/model/_client/ops/service_ops.py +1 -11
  15. snowflake/ml/model/_client/sql/service.py +13 -6
  16. snowflake/ml/model/_packager/model_handlers/_utils.py +12 -3
  17. snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
  18. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +1 -0
  19. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
  20. snowflake/ml/model/_signatures/base_handler.py +1 -2
  21. snowflake/ml/model/_signatures/builtins_handler.py +2 -2
  22. snowflake/ml/model/_signatures/numpy_handler.py +6 -7
  23. snowflake/ml/model/_signatures/pandas_handler.py +2 -2
  24. snowflake/ml/model/_signatures/pytorch_handler.py +2 -5
  25. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  26. snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
  27. snowflake/ml/model/model_signature.py +17 -4
  28. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
  29. snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
  30. snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
  31. snowflake/ml/modeling/cluster/birch.py +6 -3
  32. snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
  33. snowflake/ml/modeling/cluster/dbscan.py +6 -3
  34. snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
  35. snowflake/ml/modeling/cluster/k_means.py +6 -3
  36. snowflake/ml/modeling/cluster/mean_shift.py +6 -3
  37. snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
  38. snowflake/ml/modeling/cluster/optics.py +6 -3
  39. snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
  40. snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
  41. snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
  42. snowflake/ml/modeling/compose/column_transformer.py +6 -3
  43. snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
  44. snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
  45. snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
  46. snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
  47. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
  48. snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
  49. snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
  50. snowflake/ml/modeling/covariance/oas.py +6 -3
  51. snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
  52. snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
  53. snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
  54. snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
  55. snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
  56. snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
  57. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
  58. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
  59. snowflake/ml/modeling/decomposition/pca.py +6 -3
  60. snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
  61. snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
  62. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
  63. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
  64. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
  65. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
  66. snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
  67. snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
  68. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
  69. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
  70. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
  71. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
  72. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
  73. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
  74. snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
  75. snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
  76. snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
  77. snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
  78. snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
  79. snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
  80. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
  81. snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
  82. snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
  83. snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
  84. snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
  85. snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
  86. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
  87. snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
  88. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
  89. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
  90. snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
  91. snowflake/ml/modeling/impute/knn_imputer.py +6 -3
  92. snowflake/ml/modeling/impute/missing_indicator.py +6 -3
  93. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
  94. snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
  95. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
  96. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
  97. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
  98. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
  99. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
  100. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
  101. snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
  102. snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
  103. snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
  104. snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
  105. snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
  106. snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
  107. snowflake/ml/modeling/linear_model/lars.py +6 -3
  108. snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
  109. snowflake/ml/modeling/linear_model/lasso.py +6 -3
  110. snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
  111. snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
  112. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
  113. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
  114. snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
  115. snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
  116. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
  117. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
  118. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
  119. snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
  120. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
  121. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
  122. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
  123. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
  124. snowflake/ml/modeling/linear_model/perceptron.py +6 -3
  125. snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
  126. snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
  127. snowflake/ml/modeling/linear_model/ridge.py +6 -3
  128. snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
  129. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
  130. snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
  131. snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
  132. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
  133. snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
  134. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
  135. snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
  136. snowflake/ml/modeling/manifold/isomap.py +6 -3
  137. snowflake/ml/modeling/manifold/mds.py +6 -3
  138. snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
  139. snowflake/ml/modeling/manifold/tsne.py +6 -3
  140. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
  141. snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
  142. snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
  143. snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
  144. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
  145. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
  146. snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
  147. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
  148. snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
  149. snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
  150. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
  151. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
  152. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
  153. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
  154. snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
  155. snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
  156. snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
  157. snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
  158. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
  159. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
  160. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
  161. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
  162. snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
  163. snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
  164. snowflake/ml/modeling/pipeline/pipeline.py +10 -2
  165. snowflake/ml/modeling/preprocessing/polynomial_features.py +6 -3
  166. snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
  167. snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
  168. snowflake/ml/modeling/svm/linear_svc.py +6 -3
  169. snowflake/ml/modeling/svm/linear_svr.py +6 -3
  170. snowflake/ml/modeling/svm/nu_svc.py +6 -3
  171. snowflake/ml/modeling/svm/nu_svr.py +6 -3
  172. snowflake/ml/modeling/svm/svc.py +6 -3
  173. snowflake/ml/modeling/svm/svr.py +6 -3
  174. snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
  175. snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
  176. snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
  177. snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
  178. snowflake/ml/modeling/xgboost/xgb_classifier.py +6 -3
  179. snowflake/ml/modeling/xgboost/xgb_regressor.py +6 -3
  180. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +6 -3
  181. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +6 -3
  182. snowflake/ml/version.py +1 -1
  183. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/METADATA +29 -14
  184. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/RECORD +187 -178
  185. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/LICENSE.txt +0 -0
  186. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/WHEEL +0 -0
  187. {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.4.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LinearSVC(BaseTransformer):
61
64
  r"""Linear Support Vector Classification
62
65
  For more details on this class, see [sklearn.svm.LinearSVC]
@@ -507,7 +510,7 @@ class LinearSVC(BaseTransformer):
507
510
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
508
511
  expected_dtype = "array"
509
512
  else:
510
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
513
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
511
514
  # We can only infer the output types from the input types if the following two statemetns are true:
512
515
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
513
516
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1166,7 +1169,7 @@ class LinearSVC(BaseTransformer):
1166
1169
 
1167
1170
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1168
1171
 
1169
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1172
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1170
1173
  outputs: List[BaseFeatureSpec] = []
1171
1174
  if hasattr(self, "predict"):
1172
1175
  # keep mypy happy
@@ -1174,7 +1177,7 @@ class LinearSVC(BaseTransformer):
1174
1177
  # For classifier, the type of predict is the same as the type of label
1175
1178
  if self._sklearn_object._estimator_type == "classifier":
1176
1179
  # label columns is the desired type for output
1177
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1180
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1178
1181
  # rename the output columns
1179
1182
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1180
1183
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LinearSVR(BaseTransformer):
61
64
  r"""Linear Support Vector Regression
62
65
  For more details on this class, see [sklearn.svm.LinearSVR]
@@ -476,7 +479,7 @@ class LinearSVR(BaseTransformer):
476
479
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
477
480
  expected_dtype = "array"
478
481
  else:
479
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
482
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
480
483
  # We can only infer the output types from the input types if the following two statemetns are true:
481
484
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
482
485
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1133,7 +1136,7 @@ class LinearSVR(BaseTransformer):
1133
1136
 
1134
1137
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1135
1138
 
1136
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1139
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1137
1140
  outputs: List[BaseFeatureSpec] = []
1138
1141
  if hasattr(self, "predict"):
1139
1142
  # keep mypy happy
@@ -1141,7 +1144,7 @@ class LinearSVR(BaseTransformer):
1141
1144
  # For classifier, the type of predict is the same as the type of label
1142
1145
  if self._sklearn_object._estimator_type == "classifier":
1143
1146
  # label columns is the desired type for output
1144
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1147
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1145
1148
  # rename the output columns
1146
1149
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1147
1150
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class NuSVC(BaseTransformer):
61
64
  r"""Nu-Support Vector Classification
62
65
  For more details on this class, see [sklearn.svm.NuSVC]
@@ -506,7 +509,7 @@ class NuSVC(BaseTransformer):
506
509
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
507
510
  expected_dtype = "array"
508
511
  else:
509
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
512
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
510
513
  # We can only infer the output types from the input types if the following two statemetns are true:
511
514
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
512
515
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1169,7 +1172,7 @@ class NuSVC(BaseTransformer):
1169
1172
 
1170
1173
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1171
1174
 
1172
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1175
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1173
1176
  outputs: List[BaseFeatureSpec] = []
1174
1177
  if hasattr(self, "predict"):
1175
1178
  # keep mypy happy
@@ -1177,7 +1180,7 @@ class NuSVC(BaseTransformer):
1177
1180
  # For classifier, the type of predict is the same as the type of label
1178
1181
  if self._sklearn_object._estimator_type == "classifier":
1179
1182
  # label columns is the desired type for output
1180
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1183
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1181
1184
  # rename the output columns
1182
1185
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1183
1186
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class NuSVR(BaseTransformer):
61
64
  r"""Nu Support Vector Regression
62
65
  For more details on this class, see [sklearn.svm.NuSVR]
@@ -467,7 +470,7 @@ class NuSVR(BaseTransformer):
467
470
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
468
471
  expected_dtype = "array"
469
472
  else:
470
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
473
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
471
474
  # We can only infer the output types from the input types if the following two statemetns are true:
472
475
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
473
476
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1124,7 +1127,7 @@ class NuSVR(BaseTransformer):
1124
1127
 
1125
1128
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1126
1129
 
1127
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1130
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1128
1131
  outputs: List[BaseFeatureSpec] = []
1129
1132
  if hasattr(self, "predict"):
1130
1133
  # keep mypy happy
@@ -1132,7 +1135,7 @@ class NuSVR(BaseTransformer):
1132
1135
  # For classifier, the type of predict is the same as the type of label
1133
1136
  if self._sklearn_object._estimator_type == "classifier":
1134
1137
  # label columns is the desired type for output
1135
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1138
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1136
1139
  # rename the output columns
1137
1140
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1138
1141
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class SVC(BaseTransformer):
61
64
  r"""C-Support Vector Classification
62
65
  For more details on this class, see [sklearn.svm.SVC]
@@ -511,7 +514,7 @@ class SVC(BaseTransformer):
511
514
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
512
515
  expected_dtype = "array"
513
516
  else:
514
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
517
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
515
518
  # We can only infer the output types from the input types if the following two statemetns are true:
516
519
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
517
520
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1174,7 +1177,7 @@ class SVC(BaseTransformer):
1174
1177
 
1175
1178
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1176
1179
 
1177
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1180
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1178
1181
  outputs: List[BaseFeatureSpec] = []
1179
1182
  if hasattr(self, "predict"):
1180
1183
  # keep mypy happy
@@ -1182,7 +1185,7 @@ class SVC(BaseTransformer):
1182
1185
  # For classifier, the type of predict is the same as the type of label
1183
1186
  if self._sklearn_object._estimator_type == "classifier":
1184
1187
  # label columns is the desired type for output
1185
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1188
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1186
1189
  # rename the output columns
1187
1190
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1188
1191
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class SVR(BaseTransformer):
61
64
  r"""Epsilon-Support Vector Regression
62
65
  For more details on this class, see [sklearn.svm.SVR]
@@ -470,7 +473,7 @@ class SVR(BaseTransformer):
470
473
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
471
474
  expected_dtype = "array"
472
475
  else:
473
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
476
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
474
477
  # We can only infer the output types from the input types if the following two statemetns are true:
475
478
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
476
479
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1127,7 +1130,7 @@ class SVR(BaseTransformer):
1127
1130
 
1128
1131
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1129
1132
 
1130
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1133
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1131
1134
  outputs: List[BaseFeatureSpec] = []
1132
1135
  if hasattr(self, "predict"):
1133
1136
  # keep mypy happy
@@ -1135,7 +1138,7 @@ class SVR(BaseTransformer):
1135
1138
  # For classifier, the type of predict is the same as the type of label
1136
1139
  if self._sklearn_object._estimator_type == "classifier":
1137
1140
  # label columns is the desired type for output
1138
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1141
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1139
1142
  # rename the output columns
1140
1143
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1141
1144
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.tree".replace("sklearn."
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class DecisionTreeClassifier(BaseTransformer):
61
64
  r"""A decision tree classifier
62
65
  For more details on this class, see [sklearn.tree.DecisionTreeClassifier]
@@ -554,7 +557,7 @@ class DecisionTreeClassifier(BaseTransformer):
554
557
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
555
558
  expected_dtype = "array"
556
559
  else:
557
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
560
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
558
561
  # We can only infer the output types from the input types if the following two statemetns are true:
559
562
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
560
563
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1215,7 +1218,7 @@ class DecisionTreeClassifier(BaseTransformer):
1215
1218
 
1216
1219
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1217
1220
 
1218
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1221
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1219
1222
  outputs: List[BaseFeatureSpec] = []
1220
1223
  if hasattr(self, "predict"):
1221
1224
  # keep mypy happy
@@ -1223,7 +1226,7 @@ class DecisionTreeClassifier(BaseTransformer):
1223
1226
  # For classifier, the type of predict is the same as the type of label
1224
1227
  if self._sklearn_object._estimator_type == "classifier":
1225
1228
  # label columns is the desired type for output
1226
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1229
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1227
1230
  # rename the output columns
1228
1231
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1229
1232
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.tree".replace("sklearn."
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class DecisionTreeRegressor(BaseTransformer):
61
64
  r"""A decision tree regressor
62
65
  For more details on this class, see [sklearn.tree.DecisionTreeRegressor]
@@ -533,7 +536,7 @@ class DecisionTreeRegressor(BaseTransformer):
533
536
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
534
537
  expected_dtype = "array"
535
538
  else:
536
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
539
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
537
540
  # We can only infer the output types from the input types if the following two statemetns are true:
538
541
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
539
542
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1190,7 +1193,7 @@ class DecisionTreeRegressor(BaseTransformer):
1190
1193
 
1191
1194
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1192
1195
 
1193
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1196
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1194
1197
  outputs: List[BaseFeatureSpec] = []
1195
1198
  if hasattr(self, "predict"):
1196
1199
  # keep mypy happy
@@ -1198,7 +1201,7 @@ class DecisionTreeRegressor(BaseTransformer):
1198
1201
  # For classifier, the type of predict is the same as the type of label
1199
1202
  if self._sklearn_object._estimator_type == "classifier":
1200
1203
  # label columns is the desired type for output
1201
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1204
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1202
1205
  # rename the output columns
1203
1206
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1204
1207
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.tree".replace("sklearn."
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class ExtraTreeClassifier(BaseTransformer):
61
64
  r"""An extremely randomized tree classifier
62
65
  For more details on this class, see [sklearn.tree.ExtraTreeClassifier]
@@ -546,7 +549,7 @@ class ExtraTreeClassifier(BaseTransformer):
546
549
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
547
550
  expected_dtype = "array"
548
551
  else:
549
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
552
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
550
553
  # We can only infer the output types from the input types if the following two statemetns are true:
551
554
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
552
555
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1207,7 +1210,7 @@ class ExtraTreeClassifier(BaseTransformer):
1207
1210
 
1208
1211
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1209
1212
 
1210
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1213
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1211
1214
  outputs: List[BaseFeatureSpec] = []
1212
1215
  if hasattr(self, "predict"):
1213
1216
  # keep mypy happy
@@ -1215,7 +1218,7 @@ class ExtraTreeClassifier(BaseTransformer):
1215
1218
  # For classifier, the type of predict is the same as the type of label
1216
1219
  if self._sklearn_object._estimator_type == "classifier":
1217
1220
  # label columns is the desired type for output
1218
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1221
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1219
1222
  # rename the output columns
1220
1223
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1221
1224
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.tree".replace("sklearn."
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class ExtraTreeRegressor(BaseTransformer):
61
64
  r"""An extremely randomized tree regressor
62
65
  For more details on this class, see [sklearn.tree.ExtraTreeRegressor]
@@ -525,7 +528,7 @@ class ExtraTreeRegressor(BaseTransformer):
525
528
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
526
529
  expected_dtype = "array"
527
530
  else:
528
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
531
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
529
532
  # We can only infer the output types from the input types if the following two statemetns are true:
530
533
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
531
534
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1182,7 +1185,7 @@ class ExtraTreeRegressor(BaseTransformer):
1182
1185
 
1183
1186
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1184
1187
 
1185
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1188
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1186
1189
  outputs: List[BaseFeatureSpec] = []
1187
1190
  if hasattr(self, "predict"):
1188
1191
  # keep mypy happy
@@ -1190,7 +1193,7 @@ class ExtraTreeRegressor(BaseTransformer):
1190
1193
  # For classifier, the type of predict is the same as the type of label
1191
1194
  if self._sklearn_object._estimator_type == "classifier":
1192
1195
  # label columns is the desired type for output
1193
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1196
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1194
1197
  # rename the output columns
1195
1198
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1196
1199
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class XGBClassifier(BaseTransformer):
61
64
  r"""Implementation of the scikit-learn API for XGBoost classification
62
65
  For more details on this class, see [xgboost.XGBClassifier]
@@ -701,7 +704,7 @@ class XGBClassifier(BaseTransformer):
701
704
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
702
705
  expected_dtype = "array"
703
706
  else:
704
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
707
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
705
708
  # We can only infer the output types from the input types if the following two statemetns are true:
706
709
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
707
710
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1362,7 +1365,7 @@ class XGBClassifier(BaseTransformer):
1362
1365
 
1363
1366
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1364
1367
 
1365
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1368
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1366
1369
  outputs: List[BaseFeatureSpec] = []
1367
1370
  if hasattr(self, "predict"):
1368
1371
  # keep mypy happy
@@ -1370,7 +1373,7 @@ class XGBClassifier(BaseTransformer):
1370
1373
  # For classifier, the type of predict is the same as the type of label
1371
1374
  if self._sklearn_object._estimator_type == "classifier":
1372
1375
  # label columns is the desired type for output
1373
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1376
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1374
1377
  # rename the output columns
1375
1378
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1376
1379
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class XGBRegressor(BaseTransformer):
61
64
  r"""Implementation of the scikit-learn API for XGBoost regression
62
65
  For more details on this class, see [xgboost.XGBRegressor]
@@ -702,7 +705,7 @@ class XGBRegressor(BaseTransformer):
702
705
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
703
706
  expected_dtype = "array"
704
707
  else:
705
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
708
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
706
709
  # We can only infer the output types from the input types if the following two statemetns are true:
707
710
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
708
711
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1359,7 +1362,7 @@ class XGBRegressor(BaseTransformer):
1359
1362
 
1360
1363
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1361
1364
 
1362
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1365
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1363
1366
  outputs: List[BaseFeatureSpec] = []
1364
1367
  if hasattr(self, "predict"):
1365
1368
  # keep mypy happy
@@ -1367,7 +1370,7 @@ class XGBRegressor(BaseTransformer):
1367
1370
  # For classifier, the type of predict is the same as the type of label
1368
1371
  if self._sklearn_object._estimator_type == "classifier":
1369
1372
  # label columns is the desired type for output
1370
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1373
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1371
1374
  # rename the output columns
1372
1375
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1373
1376
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class XGBRFClassifier(BaseTransformer):
61
64
  r"""scikit-learn API for XGBoost random forest classification
62
65
  For more details on this class, see [xgboost.XGBRFClassifier]
@@ -707,7 +710,7 @@ class XGBRFClassifier(BaseTransformer):
707
710
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
708
711
  expected_dtype = "array"
709
712
  else:
710
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
713
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
711
714
  # We can only infer the output types from the input types if the following two statemetns are true:
712
715
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
713
716
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1368,7 +1371,7 @@ class XGBRFClassifier(BaseTransformer):
1368
1371
 
1369
1372
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1370
1373
 
1371
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1374
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1372
1375
  outputs: List[BaseFeatureSpec] = []
1373
1376
  if hasattr(self, "predict"):
1374
1377
  # keep mypy happy
@@ -1376,7 +1379,7 @@ class XGBRFClassifier(BaseTransformer):
1376
1379
  # For classifier, the type of predict is the same as the type of label
1377
1380
  if self._sklearn_object._estimator_type == "classifier":
1378
1381
  # label columns is the desired type for output
1379
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1382
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1380
1383
  # rename the output columns
1381
1384
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1382
1385
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class XGBRFRegressor(BaseTransformer):
61
64
  r"""scikit-learn API for XGBoost random forest regression
62
65
  For more details on this class, see [xgboost.XGBRFRegressor]
@@ -707,7 +710,7 @@ class XGBRFRegressor(BaseTransformer):
707
710
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
708
711
  expected_dtype = "array"
709
712
  else:
710
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
713
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
711
714
  # We can only infer the output types from the input types if the following two statemetns are true:
712
715
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
713
716
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1364,7 +1367,7 @@ class XGBRFRegressor(BaseTransformer):
1364
1367
 
1365
1368
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1366
1369
 
1367
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1370
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1368
1371
  outputs: List[BaseFeatureSpec] = []
1369
1372
  if hasattr(self, "predict"):
1370
1373
  # keep mypy happy
@@ -1372,7 +1375,7 @@ class XGBRFRegressor(BaseTransformer):
1372
1375
  # For classifier, the type of predict is the same as the type of label
1373
1376
  if self._sklearn_object._estimator_type == "classifier":
1374
1377
  # label columns is the desired type for output
1375
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1378
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1376
1379
  # rename the output columns
1377
1380
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1378
1381
  self._model_signature_dict["predict"] = ModelSignature(
snowflake/ml/version.py CHANGED
@@ -1 +1 @@
1
- VERSION="1.7.3"
1
+ VERSION="1.7.4"