snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. snowflake/ml/_internal/env_utils.py +16 -13
  2. snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
  3. snowflake/ml/_internal/telemetry.py +19 -0
  4. snowflake/ml/feature_store/__init__.py +9 -0
  5. snowflake/ml/feature_store/entity.py +73 -0
  6. snowflake/ml/feature_store/feature_store.py +1657 -0
  7. snowflake/ml/feature_store/feature_view.py +459 -0
  8. snowflake/ml/model/_client/ops/model_ops.py +16 -38
  9. snowflake/ml/model/_client/sql/model.py +1 -7
  10. snowflake/ml/model/_client/sql/model_version.py +20 -15
  11. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
  12. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
  14. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
  15. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
  16. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
  17. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
  18. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
  19. snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
  20. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
  21. snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
  22. snowflake/ml/model/model_signature.py +72 -16
  23. snowflake/ml/model/type_hints.py +12 -0
  24. snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
  25. snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
  26. snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
  27. snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
  28. snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
  29. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
  30. snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
  31. snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
  32. snowflake/ml/modeling/cluster/birch.py +19 -3
  33. snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
  34. snowflake/ml/modeling/cluster/dbscan.py +19 -3
  35. snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
  36. snowflake/ml/modeling/cluster/k_means.py +19 -3
  37. snowflake/ml/modeling/cluster/mean_shift.py +19 -3
  38. snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
  39. snowflake/ml/modeling/cluster/optics.py +19 -3
  40. snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
  41. snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
  42. snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
  43. snowflake/ml/modeling/compose/column_transformer.py +19 -3
  44. snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
  45. snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
  46. snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
  47. snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
  48. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
  49. snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
  50. snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
  51. snowflake/ml/modeling/covariance/oas.py +19 -3
  52. snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
  53. snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
  54. snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
  55. snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
  56. snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
  57. snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
  58. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
  59. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
  60. snowflake/ml/modeling/decomposition/pca.py +19 -3
  61. snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
  62. snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
  63. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
  64. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
  65. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
  66. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
  67. snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
  68. snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
  69. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
  70. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
  71. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
  72. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
  73. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
  75. snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
  76. snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
  77. snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
  78. snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
  79. snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
  80. snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
  81. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
  82. snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
  83. snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
  84. snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
  85. snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
  86. snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
  87. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
  88. snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
  89. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
  91. snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
  92. snowflake/ml/modeling/impute/knn_imputer.py +19 -3
  93. snowflake/ml/modeling/impute/missing_indicator.py +19 -3
  94. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
  95. snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
  96. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
  97. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
  98. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
  99. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
  100. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
  101. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
  102. snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
  103. snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
  104. snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
  105. snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
  106. snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
  107. snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
  108. snowflake/ml/modeling/linear_model/lars.py +19 -3
  109. snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
  110. snowflake/ml/modeling/linear_model/lasso.py +19 -3
  111. snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
  112. snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
  113. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
  114. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
  115. snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
  116. snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
  117. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
  118. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
  120. snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
  121. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
  122. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
  123. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
  124. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
  125. snowflake/ml/modeling/linear_model/perceptron.py +19 -3
  126. snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
  127. snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
  128. snowflake/ml/modeling/linear_model/ridge.py +19 -3
  129. snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
  130. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
  131. snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
  132. snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
  133. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
  134. snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
  135. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
  136. snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
  137. snowflake/ml/modeling/manifold/isomap.py +19 -3
  138. snowflake/ml/modeling/manifold/mds.py +19 -3
  139. snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
  140. snowflake/ml/modeling/manifold/tsne.py +19 -3
  141. snowflake/ml/modeling/metrics/classification.py +5 -6
  142. snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
  143. snowflake/ml/modeling/metrics/ranking.py +7 -3
  144. snowflake/ml/modeling/metrics/regression.py +6 -3
  145. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
  146. snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
  147. snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
  148. snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
  149. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
  150. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
  151. snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
  152. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
  153. snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
  154. snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
  155. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
  156. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
  157. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
  158. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
  159. snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
  160. snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
  161. snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
  162. snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
  163. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
  164. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
  165. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
  166. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
  167. snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
  168. snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
  169. snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
  170. snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
  171. snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
  172. snowflake/ml/modeling/svm/linear_svc.py +19 -3
  173. snowflake/ml/modeling/svm/linear_svr.py +19 -3
  174. snowflake/ml/modeling/svm/nu_svc.py +19 -3
  175. snowflake/ml/modeling/svm/nu_svr.py +19 -3
  176. snowflake/ml/modeling/svm/svc.py +19 -3
  177. snowflake/ml/modeling/svm/svr.py +19 -3
  178. snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
  179. snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
  180. snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
  181. snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
  182. snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
  183. snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
  184. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
  185. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
  186. snowflake/ml/registry/registry.py +2 -0
  187. snowflake/ml/version.py +1 -1
  188. snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
  189. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
  190. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
  191. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
  192. snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
  193. /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
  194. /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -353,7 +353,7 @@ class GradientBoostingClassifier(BaseTransformer):
353
353
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
354
354
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
355
355
  self._snowpark_cols: Optional[List[str]] = self.input_cols
356
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=GradientBoostingClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
356
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=GradientBoostingClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
357
357
  self._autogenerated = True
358
358
 
359
359
  def _get_rand_id(self) -> str:
@@ -713,6 +713,22 @@ class GradientBoostingClassifier(BaseTransformer):
713
713
  # each row containing a list of values.
714
714
  expected_dtype = "ARRAY"
715
715
 
716
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
717
+ if expected_dtype == "":
718
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
719
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
720
+ expected_dtype = "ARRAY"
721
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
722
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
723
+ expected_dtype = "ARRAY"
724
+ else:
725
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
726
+ # We can only infer the output types from the input types if the following two statemetns are true:
727
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
728
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
729
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
730
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
731
+
716
732
  output_df = self._batch_inference(
717
733
  dataset=dataset,
718
734
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -362,7 +362,7 @@ class GradientBoostingRegressor(BaseTransformer):
362
362
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
363
363
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
364
364
  self._snowpark_cols: Optional[List[str]] = self.input_cols
365
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=GradientBoostingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
365
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=GradientBoostingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
366
366
  self._autogenerated = True
367
367
 
368
368
  def _get_rand_id(self) -> str:
@@ -722,6 +722,22 @@ class GradientBoostingRegressor(BaseTransformer):
722
722
  # each row containing a list of values.
723
723
  expected_dtype = "ARRAY"
724
724
 
725
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
726
+ if expected_dtype == "":
727
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
728
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
729
+ expected_dtype = "ARRAY"
730
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
731
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
732
+ expected_dtype = "ARRAY"
733
+ else:
734
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
735
+ # We can only infer the output types from the input types if the following two statemetns are true:
736
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
737
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
738
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
739
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
740
+
725
741
  output_df = self._batch_inference(
726
742
  dataset=dataset,
727
743
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -334,7 +334,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
334
334
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
335
335
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
336
336
  self._snowpark_cols: Optional[List[str]] = self.input_cols
337
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=HistGradientBoostingClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
337
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=HistGradientBoostingClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
338
338
  self._autogenerated = True
339
339
 
340
340
  def _get_rand_id(self) -> str:
@@ -694,6 +694,22 @@ class HistGradientBoostingClassifier(BaseTransformer):
694
694
  # each row containing a list of values.
695
695
  expected_dtype = "ARRAY"
696
696
 
697
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
698
+ if expected_dtype == "":
699
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
700
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
701
+ expected_dtype = "ARRAY"
702
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
703
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
704
+ expected_dtype = "ARRAY"
705
+ else:
706
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
707
+ # We can only infer the output types from the input types if the following two statemetns are true:
708
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
709
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
710
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
711
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
712
+
697
713
  output_df = self._batch_inference(
698
714
  dataset=dataset,
699
715
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -325,7 +325,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
325
325
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
326
326
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
327
327
  self._snowpark_cols: Optional[List[str]] = self.input_cols
328
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=HistGradientBoostingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
328
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=HistGradientBoostingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
329
329
  self._autogenerated = True
330
330
 
331
331
  def _get_rand_id(self) -> str:
@@ -685,6 +685,22 @@ class HistGradientBoostingRegressor(BaseTransformer):
685
685
  # each row containing a list of values.
686
686
  expected_dtype = "ARRAY"
687
687
 
688
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
689
+ if expected_dtype == "":
690
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
691
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
692
+ expected_dtype = "ARRAY"
693
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
694
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
695
+ expected_dtype = "ARRAY"
696
+ else:
697
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
698
+ # We can only infer the output types from the input types if the following two statemetns are true:
699
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
700
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
701
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
702
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
703
+
688
704
  output_df = self._batch_inference(
689
705
  dataset=dataset,
690
706
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -225,7 +225,7 @@ class IsolationForest(BaseTransformer):
225
225
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
226
226
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
227
227
  self._snowpark_cols: Optional[List[str]] = self.input_cols
228
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=IsolationForest.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
228
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=IsolationForest.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
229
229
  self._autogenerated = True
230
230
 
231
231
  def _get_rand_id(self) -> str:
@@ -585,6 +585,22 @@ class IsolationForest(BaseTransformer):
585
585
  # each row containing a list of values.
586
586
  expected_dtype = "ARRAY"
587
587
 
588
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
589
+ if expected_dtype == "":
590
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
591
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
592
+ expected_dtype = "ARRAY"
593
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
594
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
595
+ expected_dtype = "ARRAY"
596
+ else:
597
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
598
+ # We can only infer the output types from the input types if the following two statemetns are true:
599
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
600
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
601
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
602
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
603
+
588
604
  output_df = self._batch_inference(
589
605
  dataset=dataset,
590
606
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -337,7 +337,7 @@ class RandomForestClassifier(BaseTransformer):
337
337
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
338
338
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
339
339
  self._snowpark_cols: Optional[List[str]] = self.input_cols
340
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=RandomForestClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
340
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=RandomForestClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
341
341
  self._autogenerated = True
342
342
 
343
343
  def _get_rand_id(self) -> str:
@@ -697,6 +697,22 @@ class RandomForestClassifier(BaseTransformer):
697
697
  # each row containing a list of values.
698
698
  expected_dtype = "ARRAY"
699
699
 
700
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
701
+ if expected_dtype == "":
702
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
703
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
704
+ expected_dtype = "ARRAY"
705
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
706
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
707
+ expected_dtype = "ARRAY"
708
+ else:
709
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
710
+ # We can only infer the output types from the input types if the following two statemetns are true:
711
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
712
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
713
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
714
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
715
+
700
716
  output_df = self._batch_inference(
701
717
  dataset=dataset,
702
718
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -316,7 +316,7 @@ class RandomForestRegressor(BaseTransformer):
316
316
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
317
317
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
318
318
  self._snowpark_cols: Optional[List[str]] = self.input_cols
319
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=RandomForestRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
319
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=RandomForestRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
320
320
  self._autogenerated = True
321
321
 
322
322
  def _get_rand_id(self) -> str:
@@ -676,6 +676,22 @@ class RandomForestRegressor(BaseTransformer):
676
676
  # each row containing a list of values.
677
677
  expected_dtype = "ARRAY"
678
678
 
679
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
680
+ if expected_dtype == "":
681
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
682
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
683
+ expected_dtype = "ARRAY"
684
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
685
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
686
+ expected_dtype = "ARRAY"
687
+ else:
688
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
689
+ # We can only infer the output types from the input types if the following two statemetns are true:
690
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
691
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
692
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
693
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
694
+
679
695
  output_df = self._batch_inference(
680
696
  dataset=dataset,
681
697
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -217,7 +217,7 @@ class StackingRegressor(BaseTransformer):
217
217
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
218
218
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
219
219
  self._snowpark_cols: Optional[List[str]] = self.input_cols
220
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=StackingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
220
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=StackingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
221
221
  self._autogenerated = True
222
222
 
223
223
  def _get_rand_id(self) -> str:
@@ -579,6 +579,22 @@ class StackingRegressor(BaseTransformer):
579
579
  # each row containing a list of values.
580
580
  expected_dtype = "ARRAY"
581
581
 
582
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
583
+ if expected_dtype == "":
584
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
585
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
586
+ expected_dtype = "ARRAY"
587
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
588
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
589
+ expected_dtype = "ARRAY"
590
+ else:
591
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
592
+ # We can only infer the output types from the input types if the following two statemetns are true:
593
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
594
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
595
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
596
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
597
+
582
598
  output_df = self._batch_inference(
583
599
  dataset=dataset,
584
600
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -199,7 +199,7 @@ class VotingClassifier(BaseTransformer):
199
199
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
200
200
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
201
201
  self._snowpark_cols: Optional[List[str]] = self.input_cols
202
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=VotingClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
202
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=VotingClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
203
203
  self._autogenerated = True
204
204
 
205
205
  def _get_rand_id(self) -> str:
@@ -561,6 +561,22 @@ class VotingClassifier(BaseTransformer):
561
561
  # each row containing a list of values.
562
562
  expected_dtype = "ARRAY"
563
563
 
564
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
565
+ if expected_dtype == "":
566
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
567
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
568
+ expected_dtype = "ARRAY"
569
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
570
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
571
+ expected_dtype = "ARRAY"
572
+ else:
573
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
574
+ # We can only infer the output types from the input types if the following two statemetns are true:
575
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
576
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
577
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
578
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
579
+
564
580
  output_df = self._batch_inference(
565
581
  dataset=dataset,
566
582
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -181,7 +181,7 @@ class VotingRegressor(BaseTransformer):
181
181
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
182
182
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
183
183
  self._snowpark_cols: Optional[List[str]] = self.input_cols
184
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=VotingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
184
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=VotingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
185
185
  self._autogenerated = True
186
186
 
187
187
  def _get_rand_id(self) -> str:
@@ -543,6 +543,22 @@ class VotingRegressor(BaseTransformer):
543
543
  # each row containing a list of values.
544
544
  expected_dtype = "ARRAY"
545
545
 
546
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
547
+ if expected_dtype == "":
548
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
549
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
550
+ expected_dtype = "ARRAY"
551
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
552
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
553
+ expected_dtype = "ARRAY"
554
+ else:
555
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
556
+ # We can only infer the output types from the input types if the following two statemetns are true:
557
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
558
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
559
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
560
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
561
+
546
562
  output_df = self._batch_inference(
547
563
  dataset=dataset,
548
564
  inference_method="transform",
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
27
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
28
28
  from snowflake.snowpark import DataFrame, Session
29
29
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
31
31
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
32
32
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
33
33
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
36
36
  transform_snowml_obj_to_sklearn_obj,
37
37
  validate_sklearn_args,
38
38
  )
39
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
39
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
40
40
 
41
41
  from snowflake.ml.model.model_signature import (
42
42
  DataType,
@@ -171,7 +171,7 @@ class GenericUnivariateSelect(BaseTransformer):
171
171
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
172
172
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
173
173
  self._snowpark_cols: Optional[List[str]] = self.input_cols
174
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=GenericUnivariateSelect.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
174
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=GenericUnivariateSelect.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
175
175
  self._autogenerated = True
176
176
 
177
177
  def _get_rand_id(self) -> str:
@@ -531,6 +531,22 @@ class GenericUnivariateSelect(BaseTransformer):
531
531
  # each row containing a list of values.
532
532
  expected_dtype = "ARRAY"
533
533
 
534
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
535
+ if expected_dtype == "":
536
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
537
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
538
+ expected_dtype = "ARRAY"
539
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
540
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
541
+ expected_dtype = "ARRAY"
542
+ else:
543
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
544
+ # We can only infer the output types from the input types if the following two statemetns are true:
545
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
546
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
547
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
548
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
549
+
534
550
  output_df = self._batch_inference(
535
551
  dataset=dataset,
536
552
  inference_method="transform",
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
27
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
28
28
  from snowflake.snowpark import DataFrame, Session
29
29
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
31
31
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
32
32
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
33
33
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
36
36
  transform_snowml_obj_to_sklearn_obj,
37
37
  validate_sklearn_args,
38
38
  )
39
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
39
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
40
40
 
41
41
  from snowflake.ml.model.model_signature import (
42
42
  DataType,
@@ -167,7 +167,7 @@ class SelectFdr(BaseTransformer):
167
167
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
168
168
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
169
169
  self._snowpark_cols: Optional[List[str]] = self.input_cols
170
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=SelectFdr.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
170
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=SelectFdr.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
171
171
  self._autogenerated = True
172
172
 
173
173
  def _get_rand_id(self) -> str:
@@ -527,6 +527,22 @@ class SelectFdr(BaseTransformer):
527
527
  # each row containing a list of values.
528
528
  expected_dtype = "ARRAY"
529
529
 
530
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
531
+ if expected_dtype == "":
532
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
533
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
534
+ expected_dtype = "ARRAY"
535
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
536
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
537
+ expected_dtype = "ARRAY"
538
+ else:
539
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
540
+ # We can only infer the output types from the input types if the following two statemetns are true:
541
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
542
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
543
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
544
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
545
+
530
546
  output_df = self._batch_inference(
531
547
  dataset=dataset,
532
548
  inference_method="transform",