snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. snowflake/cortex/__init__.py +16 -8
  2. snowflake/cortex/_classify_text.py +12 -1
  3. snowflake/cortex/_complete.py +101 -13
  4. snowflake/cortex/_embed_text_1024.py +9 -2
  5. snowflake/cortex/_embed_text_768.py +9 -2
  6. snowflake/cortex/_extract_answer.py +9 -2
  7. snowflake/cortex/_sentiment.py +9 -2
  8. snowflake/cortex/_summarize.py +9 -2
  9. snowflake/cortex/_translate.py +9 -2
  10. snowflake/ml/_internal/env_utils.py +7 -52
  11. snowflake/ml/_internal/platform_capabilities.py +87 -0
  12. snowflake/ml/_internal/utils/identifier.py +4 -2
  13. snowflake/ml/data/__init__.py +3 -0
  14. snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
  15. snowflake/ml/data/data_connector.py +53 -11
  16. snowflake/ml/data/data_ingestor.py +2 -1
  17. snowflake/ml/data/torch_utils.py +18 -5
  18. snowflake/ml/dataset/dataset.py +0 -1
  19. snowflake/ml/feature_store/examples/example_helper.py +2 -1
  20. snowflake/ml/fileset/fileset.py +24 -18
  21. snowflake/ml/jobs/__init__.py +21 -0
  22. snowflake/ml/jobs/_utils/constants.py +51 -0
  23. snowflake/ml/jobs/_utils/payload_utils.py +352 -0
  24. snowflake/ml/jobs/_utils/spec_utils.py +298 -0
  25. snowflake/ml/jobs/_utils/types.py +39 -0
  26. snowflake/ml/jobs/decorators.py +91 -0
  27. snowflake/ml/jobs/job.py +113 -0
  28. snowflake/ml/jobs/manager.py +298 -0
  29. snowflake/ml/model/_client/model/model_version_impl.py +5 -3
  30. snowflake/ml/model/_client/ops/model_ops.py +13 -8
  31. snowflake/ml/model/_client/ops/service_ops.py +1 -11
  32. snowflake/ml/model/_client/sql/model_version.py +11 -0
  33. snowflake/ml/model/_client/sql/service.py +13 -6
  34. snowflake/ml/model/_model_composer/model_composer.py +8 -3
  35. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
  36. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
  37. snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
  38. snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
  39. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
  40. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
  41. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
  42. snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
  43. snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
  44. snowflake/ml/model/_packager/model_handlers/_utils.py +39 -5
  45. snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
  46. snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
  47. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +6 -1
  48. snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
  49. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
  50. snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
  51. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
  52. snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
  53. snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
  54. snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
  55. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  56. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
  57. snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
  58. snowflake/ml/model/_signatures/base_handler.py +1 -2
  59. snowflake/ml/model/_signatures/builtins_handler.py +2 -2
  60. snowflake/ml/model/_signatures/numpy_handler.py +6 -7
  61. snowflake/ml/model/_signatures/pandas_handler.py +3 -3
  62. snowflake/ml/model/_signatures/pytorch_handler.py +2 -5
  63. snowflake/ml/model/_signatures/snowpark_handler.py +11 -5
  64. snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
  65. snowflake/ml/model/model_signature.py +17 -4
  66. snowflake/ml/model/type_hints.py +1 -0
  67. snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
  68. snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
  69. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
  70. snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
  71. snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
  72. snowflake/ml/modeling/cluster/birch.py +6 -3
  73. snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
  74. snowflake/ml/modeling/cluster/dbscan.py +6 -3
  75. snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
  76. snowflake/ml/modeling/cluster/k_means.py +6 -3
  77. snowflake/ml/modeling/cluster/mean_shift.py +6 -3
  78. snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
  79. snowflake/ml/modeling/cluster/optics.py +6 -3
  80. snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
  81. snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
  82. snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
  83. snowflake/ml/modeling/compose/column_transformer.py +6 -3
  84. snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
  85. snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
  86. snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
  87. snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
  88. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
  89. snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
  90. snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
  91. snowflake/ml/modeling/covariance/oas.py +6 -3
  92. snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
  93. snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
  94. snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
  95. snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
  96. snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
  97. snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
  98. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
  99. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
  100. snowflake/ml/modeling/decomposition/pca.py +6 -3
  101. snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
  102. snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
  103. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
  104. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
  105. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
  106. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
  107. snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
  108. snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
  109. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
  110. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
  111. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
  112. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
  113. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
  114. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
  115. snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
  116. snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
  117. snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
  118. snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
  119. snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
  120. snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
  121. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
  122. snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
  123. snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
  124. snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
  125. snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
  126. snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
  127. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
  128. snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
  129. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
  130. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
  131. snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
  132. snowflake/ml/modeling/impute/knn_imputer.py +6 -3
  133. snowflake/ml/modeling/impute/missing_indicator.py +6 -3
  134. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
  135. snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
  136. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
  137. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
  138. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
  139. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
  140. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
  141. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
  142. snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
  143. snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
  144. snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
  145. snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
  146. snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
  147. snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
  148. snowflake/ml/modeling/linear_model/lars.py +6 -3
  149. snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
  150. snowflake/ml/modeling/linear_model/lasso.py +6 -3
  151. snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
  152. snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
  153. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
  154. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
  155. snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
  156. snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
  157. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
  158. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
  159. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
  160. snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
  161. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
  162. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
  163. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
  164. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
  165. snowflake/ml/modeling/linear_model/perceptron.py +6 -3
  166. snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
  167. snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
  168. snowflake/ml/modeling/linear_model/ridge.py +6 -3
  169. snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
  170. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
  171. snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
  172. snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
  173. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
  174. snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
  175. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
  176. snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
  177. snowflake/ml/modeling/manifold/isomap.py +6 -3
  178. snowflake/ml/modeling/manifold/mds.py +6 -3
  179. snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
  180. snowflake/ml/modeling/manifold/tsne.py +6 -3
  181. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
  182. snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
  183. snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
  184. snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
  185. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
  186. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
  187. snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
  188. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
  189. snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
  190. snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
  191. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
  192. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
  193. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
  194. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
  195. snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
  196. snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
  197. snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
  198. snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
  199. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
  200. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
  201. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
  202. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
  203. snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
  204. snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
  205. snowflake/ml/modeling/pipeline/pipeline.py +16 -178
  206. snowflake/ml/modeling/preprocessing/polynomial_features.py +6 -3
  207. snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
  208. snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
  209. snowflake/ml/modeling/svm/linear_svc.py +6 -3
  210. snowflake/ml/modeling/svm/linear_svr.py +6 -3
  211. snowflake/ml/modeling/svm/nu_svc.py +6 -3
  212. snowflake/ml/modeling/svm/nu_svr.py +6 -3
  213. snowflake/ml/modeling/svm/svc.py +6 -3
  214. snowflake/ml/modeling/svm/svr.py +6 -3
  215. snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
  216. snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
  217. snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
  218. snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
  219. snowflake/ml/modeling/xgboost/xgb_classifier.py +167 -91
  220. snowflake/ml/modeling/xgboost/xgb_regressor.py +166 -88
  221. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +166 -88
  222. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +166 -88
  223. snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
  224. snowflake/ml/registry/_manager/model_manager.py +70 -33
  225. snowflake/ml/registry/registry.py +41 -22
  226. snowflake/ml/version.py +1 -1
  227. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/METADATA +63 -19
  228. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/RECORD +231 -226
  229. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/WHEEL +1 -1
  230. snowflake/ml/_internal/utils/retryable_http.py +0 -39
  231. snowflake/ml/fileset/parquet_parser.py +0 -170
  232. snowflake/ml/fileset/tf_dataset.py +0 -88
  233. snowflake/ml/fileset/torch_datapipe.py +0 -57
  234. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
  235. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
  236. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/LICENSE.txt +0 -0
  237. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,11 @@ from snowflake.ml._internal.exceptions import error_codes, exceptions
20
20
  from snowflake.ml._internal.lineage import lineage_utils
21
21
  from snowflake.ml._internal.utils import snowpark_dataframe_utils, temp_file_utils
22
22
  from snowflake.ml.data import data_source
23
- from snowflake.ml.model.model_signature import ModelSignature, _infer_signature
23
+ from snowflake.ml.model.model_signature import (
24
+ ModelSignature,
25
+ _infer_signature,
26
+ _truncate_data,
27
+ )
24
28
  from snowflake.ml.modeling._internal.model_transformer_builder import (
25
29
  ModelTransformerBuilder,
26
30
  )
@@ -30,7 +34,8 @@ from snowflake.snowpark._internal import utils as snowpark_utils
30
34
 
31
35
  _PROJECT = "ModelDevelopment"
32
36
  _SUBPROJECT = "Framework"
33
- IN_ML_RUNTIME_ENV_VAR = "IN_SPCS_ML_RUNTIME"
37
+
38
+ INFER_SIGNATURE_MAX_ROWS = 100
34
39
 
35
40
 
36
41
  def _final_step_has(attr: str) -> Callable[..., bool]:
@@ -432,10 +437,7 @@ class Pipeline(base.BaseTransformer):
432
437
  data_sources = [data_source.DataFrameInfo(dataset.queries["queries"][-1])]
433
438
  lineage_utils.set_data_sources(self, data_sources)
434
439
 
435
- if self._can_be_trained_in_ml_runtime(dataset):
436
- self._fit_ml_runtime(dataset)
437
-
438
- elif squash and isinstance(dataset, snowpark.DataFrame):
440
+ if squash and isinstance(dataset, snowpark.DataFrame):
439
441
  session = dataset._session
440
442
  assert session is not None
441
443
  self._fit_snowpark_dataframe_within_one_sproc(session=session, dataset=dataset)
@@ -606,25 +608,7 @@ class Pipeline(base.BaseTransformer):
606
608
  Returns:
607
609
  Output dataset.
608
610
  """
609
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR) and self._sklearn_object is not None:
610
- expected_output_cols = self._infer_output_cols()
611
- handler = ModelTransformerBuilder.build(
612
- dataset=dataset,
613
- estimator=self._sklearn_object,
614
- class_name="Pipeline",
615
- subproject="",
616
- autogenerated=False,
617
- )
618
- return handler.batch_inference(
619
- inference_method="predict",
620
- input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
621
- expected_output_cols=expected_output_cols,
622
- session=dataset._session,
623
- dependencies=self._deps,
624
- )
625
-
626
- else:
627
- return self._invoke_estimator_func("predict", dataset)
611
+ return self._invoke_estimator_func("predict", dataset)
628
612
 
629
613
  @metaestimators.available_if(_final_step_has("score_samples")) # type: ignore[misc]
630
614
  @telemetry.send_api_usage_telemetry(
@@ -642,32 +626,8 @@ class Pipeline(base.BaseTransformer):
642
626
 
643
627
  Returns:
644
628
  Output dataset.
645
-
646
- Raises:
647
- ValueError: An sklearn object has not been fit before calling this function
648
629
  """
649
-
650
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
651
- if self._sklearn_object is None:
652
- raise ValueError("Model must be fit before inference.")
653
-
654
- expected_output_cols = self._get_output_column_names("score_samples")
655
- handler = ModelTransformerBuilder.build(
656
- dataset=dataset,
657
- estimator=self._sklearn_object,
658
- class_name="Pipeline",
659
- subproject="",
660
- autogenerated=False,
661
- )
662
- return handler.batch_inference(
663
- inference_method="score_samples",
664
- input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
665
- expected_output_cols=expected_output_cols,
666
- session=dataset._session,
667
- dependencies=self._deps,
668
- )
669
- else:
670
- return self._invoke_estimator_func("score_samples", dataset)
630
+ return self._invoke_estimator_func("score_samples", dataset)
671
631
 
672
632
  @metaestimators.available_if(_final_step_has("predict_proba")) # type: ignore[misc]
673
633
  @telemetry.send_api_usage_telemetry(
@@ -685,32 +645,8 @@ class Pipeline(base.BaseTransformer):
685
645
 
686
646
  Returns:
687
647
  Output dataset.
688
-
689
- Raises:
690
- ValueError: An sklearn object has not been fit before calling this function
691
648
  """
692
-
693
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
694
- if self._sklearn_object is None:
695
- raise ValueError("Model must be fit before inference.")
696
- expected_output_cols = self._get_output_column_names("predict_proba")
697
-
698
- handler = ModelTransformerBuilder.build(
699
- dataset=dataset,
700
- estimator=self._sklearn_object,
701
- class_name="Pipeline",
702
- subproject="",
703
- autogenerated=False,
704
- )
705
- return handler.batch_inference(
706
- inference_method="predict_proba",
707
- input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
708
- expected_output_cols=expected_output_cols,
709
- session=dataset._session,
710
- dependencies=self._deps,
711
- )
712
- else:
713
- return self._invoke_estimator_func("predict_proba", dataset)
649
+ return self._invoke_estimator_func("predict_proba", dataset)
714
650
 
715
651
  @metaestimators.available_if(_final_step_has("predict_log_proba")) # type: ignore[misc]
716
652
  @telemetry.send_api_usage_telemetry(
@@ -729,31 +665,8 @@ class Pipeline(base.BaseTransformer):
729
665
 
730
666
  Returns:
731
667
  Output dataset.
732
-
733
- Raises:
734
- ValueError: An sklearn object has not been fit before calling this function
735
668
  """
736
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
737
- if self._sklearn_object is None:
738
- raise ValueError("Model must be fit before inference.")
739
-
740
- expected_output_cols = self._get_output_column_names("predict_log_proba")
741
- handler = ModelTransformerBuilder.build(
742
- dataset=dataset,
743
- estimator=self._sklearn_object,
744
- class_name="Pipeline",
745
- subproject="",
746
- autogenerated=False,
747
- )
748
- return handler.batch_inference(
749
- inference_method="predict_log_proba",
750
- input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
751
- expected_output_cols=expected_output_cols,
752
- session=dataset._session,
753
- dependencies=self._deps,
754
- )
755
- else:
756
- return self._invoke_estimator_func("predict_log_proba", dataset)
669
+ return self._invoke_estimator_func("predict_log_proba", dataset)
757
670
 
758
671
  @metaestimators.available_if(_final_step_has("score")) # type: ignore[misc]
759
672
  @telemetry.send_api_usage_telemetry(
@@ -769,30 +682,9 @@ class Pipeline(base.BaseTransformer):
769
682
 
770
683
  Returns:
771
684
  Output dataset.
772
-
773
- Raises:
774
- ValueError: An sklearn object has not been fit before calling this function
775
685
  """
776
686
 
777
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
778
- if self._sklearn_object is None:
779
- raise ValueError("Model must be fit before scoreing.")
780
- handler = ModelTransformerBuilder.build(
781
- dataset=dataset,
782
- estimator=self._sklearn_object,
783
- class_name="Pipeline",
784
- subproject="",
785
- autogenerated=False,
786
- )
787
- return handler.score(
788
- input_cols=self._infer_input_cols(),
789
- label_cols=self._get_label_cols(),
790
- session=dataset._session,
791
- dependencies=self._deps,
792
- score_sproc_imports=[],
793
- )
794
- else:
795
- return self._invoke_estimator_func("score", dataset)
687
+ return self._invoke_estimator_func("score", dataset)
796
688
 
797
689
  def _invoke_estimator_func(
798
690
  self, func_name: str, dataset: Union[snowpark.DataFrame, pd.DataFrame]
@@ -882,39 +774,6 @@ class Pipeline(base.BaseTransformer):
882
774
 
883
775
  return ct
884
776
 
885
- def _fit_ml_runtime(self, dataset: snowpark.DataFrame) -> None:
886
- """Train the pipeline in the ML Runtime.
887
-
888
- Args:
889
- dataset: The training Snowpark dataframe
890
-
891
- Raises:
892
- ModuleNotFoundError: The ML Runtime Client is not installed.
893
- """
894
- try:
895
- from snowflake.ml.runtime import MLRuntimeClient
896
- except ModuleNotFoundError as e:
897
- # The snowflake.ml.runtime module should always be present when
898
- # the env var IN_SPCS_ML_RUNTIME is present.
899
- raise ModuleNotFoundError("ML Runtime Python Client is not installed.") from e
900
-
901
- client = MLRuntimeClient()
902
- ml_runtime_compatible_pipeline = self._create_unfitted_sklearn_object()
903
-
904
- label_cols = self._get_label_cols()
905
- all_df_cols = dataset.columns
906
- input_cols = [col for col in all_df_cols if col not in label_cols]
907
-
908
- trained_pipeline = client.train(
909
- estimator=ml_runtime_compatible_pipeline,
910
- dataset=dataset,
911
- input_cols=input_cols,
912
- label_cols=label_cols,
913
- sample_weight_col=self.sample_weight_col,
914
- )
915
-
916
- self._sklearn_object = trained_pipeline
917
-
918
777
  def _get_label_cols(self) -> List[str]:
919
778
  """Util function to get the label columns from the pipeline.
920
779
  The label column is only present in the estimator
@@ -929,28 +788,6 @@ class Pipeline(base.BaseTransformer):
929
788
 
930
789
  return label_cols
931
790
 
932
- def _can_be_trained_in_ml_runtime(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> bool:
933
- """A utility function to determine if the pipeline cam be pushed down to the ML Runtime for training.
934
- Currently, this is true if:
935
- - The training dataset is a snowpark dataframe,
936
- - The IN_SPCS_ML_RUNTIME environment is present and
937
- - The pipeline can be converted to an sklearn pipeline.
938
-
939
- Args:
940
- dataset: The training dataset
941
-
942
- Returns:
943
- True if the dataset can be fit in the ml runtime, else false.
944
-
945
- """
946
- if not isinstance(dataset, snowpark.DataFrame):
947
- return False
948
-
949
- if not os.environ.get(IN_ML_RUNTIME_ENV_VAR):
950
- return False
951
-
952
- return self._is_convertible_to_sklearn
953
-
954
791
  @staticmethod
955
792
  def _wrap_transformer_in_column_transformer(
956
793
  transformer_name: str, transformer: base.BaseTransformer
@@ -1054,7 +891,9 @@ class Pipeline(base.BaseTransformer):
1054
891
  self._model_signature_dict = dict()
1055
892
 
1056
893
  input_columns = self._get_sanitized_list_of_columns(dataset.columns)
1057
- inputs_signature = _infer_signature(dataset[input_columns], "input", use_snowflake_identifiers=True)
894
+ inputs_signature = _infer_signature(
895
+ _truncate_data(dataset[input_columns], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True
896
+ )
1058
897
 
1059
898
  estimator_step = self._get_estimator()
1060
899
  if estimator_step:
@@ -1124,7 +963,6 @@ class Pipeline(base.BaseTransformer):
1124
963
 
1125
964
  telemetry_data = {
1126
965
  "pipeline_is_convertible_to_sklearn": self._is_convertible_to_sklearn,
1127
- "in_spcs_ml_runtime": bool(os.environ.get(IN_ML_RUNTIME_ENV_VAR)),
1128
966
  }
1129
967
  telemetry.send_custom_usage(
1130
968
  project=_PROJECT,
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.preprocessing".replace("
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class PolynomialFeatures(BaseTransformer):
61
64
  r"""Generate polynomial and interaction features
62
65
  For more details on this class, see [sklearn.preprocessing.PolynomialFeatures]
@@ -429,7 +432,7 @@ class PolynomialFeatures(BaseTransformer):
429
432
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
430
433
  expected_dtype = "array"
431
434
  else:
432
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
435
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
433
436
  # We can only infer the output types from the input types if the following two statemetns are true:
434
437
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
435
438
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1086,7 +1089,7 @@ class PolynomialFeatures(BaseTransformer):
1086
1089
 
1087
1090
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1088
1091
 
1089
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1092
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1090
1093
  outputs: List[BaseFeatureSpec] = []
1091
1094
  if hasattr(self, "predict"):
1092
1095
  # keep mypy happy
@@ -1094,7 +1097,7 @@ class PolynomialFeatures(BaseTransformer):
1094
1097
  # For classifier, the type of predict is the same as the type of label
1095
1098
  if self._sklearn_object._estimator_type == "classifier":
1096
1099
  # label columns is the desired type for output
1097
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1100
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1098
1101
  # rename the output columns
1099
1102
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1100
1103
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.semi_supervised".replace
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LabelPropagation(BaseTransformer):
61
64
  r"""Label Propagation classifier
62
65
  For more details on this class, see [sklearn.semi_supervised.LabelPropagation]
@@ -433,7 +436,7 @@ class LabelPropagation(BaseTransformer):
433
436
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
434
437
  expected_dtype = "array"
435
438
  else:
436
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
439
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
437
440
  # We can only infer the output types from the input types if the following two statemetns are true:
438
441
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
439
442
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1094,7 +1097,7 @@ class LabelPropagation(BaseTransformer):
1094
1097
 
1095
1098
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1096
1099
 
1097
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1100
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1098
1101
  outputs: List[BaseFeatureSpec] = []
1099
1102
  if hasattr(self, "predict"):
1100
1103
  # keep mypy happy
@@ -1102,7 +1105,7 @@ class LabelPropagation(BaseTransformer):
1102
1105
  # For classifier, the type of predict is the same as the type of label
1103
1106
  if self._sklearn_object._estimator_type == "classifier":
1104
1107
  # label columns is the desired type for output
1105
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1108
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1106
1109
  # rename the output columns
1107
1110
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1108
1111
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.semi_supervised".replace
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LabelSpreading(BaseTransformer):
61
64
  r"""LabelSpreading model for semi-supervised learning
62
65
  For more details on this class, see [sklearn.semi_supervised.LabelSpreading]
@@ -442,7 +445,7 @@ class LabelSpreading(BaseTransformer):
442
445
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
443
446
  expected_dtype = "array"
444
447
  else:
445
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
448
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
446
449
  # We can only infer the output types from the input types if the following two statemetns are true:
447
450
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
448
451
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1103,7 +1106,7 @@ class LabelSpreading(BaseTransformer):
1103
1106
 
1104
1107
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1105
1108
 
1106
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1109
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1107
1110
  outputs: List[BaseFeatureSpec] = []
1108
1111
  if hasattr(self, "predict"):
1109
1112
  # keep mypy happy
@@ -1111,7 +1114,7 @@ class LabelSpreading(BaseTransformer):
1111
1114
  # For classifier, the type of predict is the same as the type of label
1112
1115
  if self._sklearn_object._estimator_type == "classifier":
1113
1116
  # label columns is the desired type for output
1114
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1117
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1115
1118
  # rename the output columns
1116
1119
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1117
1120
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LinearSVC(BaseTransformer):
61
64
  r"""Linear Support Vector Classification
62
65
  For more details on this class, see [sklearn.svm.LinearSVC]
@@ -507,7 +510,7 @@ class LinearSVC(BaseTransformer):
507
510
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
508
511
  expected_dtype = "array"
509
512
  else:
510
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
513
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
511
514
  # We can only infer the output types from the input types if the following two statemetns are true:
512
515
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
513
516
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1166,7 +1169,7 @@ class LinearSVC(BaseTransformer):
1166
1169
 
1167
1170
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1168
1171
 
1169
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1172
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1170
1173
  outputs: List[BaseFeatureSpec] = []
1171
1174
  if hasattr(self, "predict"):
1172
1175
  # keep mypy happy
@@ -1174,7 +1177,7 @@ class LinearSVC(BaseTransformer):
1174
1177
  # For classifier, the type of predict is the same as the type of label
1175
1178
  if self._sklearn_object._estimator_type == "classifier":
1176
1179
  # label columns is the desired type for output
1177
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1180
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1178
1181
  # rename the output columns
1179
1182
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1180
1183
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class LinearSVR(BaseTransformer):
61
64
  r"""Linear Support Vector Regression
62
65
  For more details on this class, see [sklearn.svm.LinearSVR]
@@ -476,7 +479,7 @@ class LinearSVR(BaseTransformer):
476
479
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
477
480
  expected_dtype = "array"
478
481
  else:
479
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
482
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
480
483
  # We can only infer the output types from the input types if the following two statemetns are true:
481
484
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
482
485
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1133,7 +1136,7 @@ class LinearSVR(BaseTransformer):
1133
1136
 
1134
1137
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1135
1138
 
1136
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1139
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1137
1140
  outputs: List[BaseFeatureSpec] = []
1138
1141
  if hasattr(self, "predict"):
1139
1142
  # keep mypy happy
@@ -1141,7 +1144,7 @@ class LinearSVR(BaseTransformer):
1141
1144
  # For classifier, the type of predict is the same as the type of label
1142
1145
  if self._sklearn_object._estimator_type == "classifier":
1143
1146
  # label columns is the desired type for output
1144
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1147
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1145
1148
  # rename the output columns
1146
1149
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1147
1150
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class NuSVC(BaseTransformer):
61
64
  r"""Nu-Support Vector Classification
62
65
  For more details on this class, see [sklearn.svm.NuSVC]
@@ -506,7 +509,7 @@ class NuSVC(BaseTransformer):
506
509
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
507
510
  expected_dtype = "array"
508
511
  else:
509
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
512
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
510
513
  # We can only infer the output types from the input types if the following two statemetns are true:
511
514
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
512
515
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1169,7 +1172,7 @@ class NuSVC(BaseTransformer):
1169
1172
 
1170
1173
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1171
1174
 
1172
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1175
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1173
1176
  outputs: List[BaseFeatureSpec] = []
1174
1177
  if hasattr(self, "predict"):
1175
1178
  # keep mypy happy
@@ -1177,7 +1180,7 @@ class NuSVC(BaseTransformer):
1177
1180
  # For classifier, the type of predict is the same as the type of label
1178
1181
  if self._sklearn_object._estimator_type == "classifier":
1179
1182
  # label columns is the desired type for output
1180
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1183
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1181
1184
  # rename the output columns
1182
1185
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1183
1186
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class NuSVR(BaseTransformer):
61
64
  r"""Nu Support Vector Regression
62
65
  For more details on this class, see [sklearn.svm.NuSVR]
@@ -467,7 +470,7 @@ class NuSVR(BaseTransformer):
467
470
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
468
471
  expected_dtype = "array"
469
472
  else:
470
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
473
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
471
474
  # We can only infer the output types from the input types if the following two statemetns are true:
472
475
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
473
476
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1124,7 +1127,7 @@ class NuSVR(BaseTransformer):
1124
1127
 
1125
1128
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1126
1129
 
1127
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1130
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1128
1131
  outputs: List[BaseFeatureSpec] = []
1129
1132
  if hasattr(self, "predict"):
1130
1133
  # keep mypy happy
@@ -1132,7 +1135,7 @@ class NuSVR(BaseTransformer):
1132
1135
  # For classifier, the type of predict is the same as the type of label
1133
1136
  if self._sklearn_object._estimator_type == "classifier":
1134
1137
  # label columns is the desired type for output
1135
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1138
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1136
1139
  # rename the output columns
1137
1140
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1138
1141
  self._model_signature_dict["predict"] = ModelSignature(
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class SVC(BaseTransformer):
61
64
  r"""C-Support Vector Classification
62
65
  For more details on this class, see [sklearn.svm.SVC]
@@ -511,7 +514,7 @@ class SVC(BaseTransformer):
511
514
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
512
515
  expected_dtype = "array"
513
516
  else:
514
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
517
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
515
518
  # We can only infer the output types from the input types if the following two statemetns are true:
516
519
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
517
520
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1174,7 +1177,7 @@ class SVC(BaseTransformer):
1174
1177
 
1175
1178
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1176
1179
 
1177
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1180
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1178
1181
  outputs: List[BaseFeatureSpec] = []
1179
1182
  if hasattr(self, "predict"):
1180
1183
  # keep mypy happy
@@ -1182,7 +1185,7 @@ class SVC(BaseTransformer):
1182
1185
  # For classifier, the type of predict is the same as the type of label
1183
1186
  if self._sklearn_object._estimator_type == "classifier":
1184
1187
  # label columns is the desired type for output
1185
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1188
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1186
1189
  # rename the output columns
1187
1190
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1188
1191
  self._model_signature_dict["predict"] = ModelSignature(