snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. snowflake/ml/_internal/env_utils.py +16 -13
  2. snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
  3. snowflake/ml/_internal/telemetry.py +19 -0
  4. snowflake/ml/feature_store/__init__.py +9 -0
  5. snowflake/ml/feature_store/entity.py +73 -0
  6. snowflake/ml/feature_store/feature_store.py +1657 -0
  7. snowflake/ml/feature_store/feature_view.py +459 -0
  8. snowflake/ml/model/_client/ops/model_ops.py +16 -38
  9. snowflake/ml/model/_client/sql/model.py +1 -7
  10. snowflake/ml/model/_client/sql/model_version.py +20 -15
  11. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
  12. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
  14. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
  15. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
  16. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
  17. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
  18. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
  19. snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
  20. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
  21. snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
  22. snowflake/ml/model/model_signature.py +72 -16
  23. snowflake/ml/model/type_hints.py +12 -0
  24. snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
  25. snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
  26. snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
  27. snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
  28. snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
  29. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
  30. snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
  31. snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
  32. snowflake/ml/modeling/cluster/birch.py +19 -3
  33. snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
  34. snowflake/ml/modeling/cluster/dbscan.py +19 -3
  35. snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
  36. snowflake/ml/modeling/cluster/k_means.py +19 -3
  37. snowflake/ml/modeling/cluster/mean_shift.py +19 -3
  38. snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
  39. snowflake/ml/modeling/cluster/optics.py +19 -3
  40. snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
  41. snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
  42. snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
  43. snowflake/ml/modeling/compose/column_transformer.py +19 -3
  44. snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
  45. snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
  46. snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
  47. snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
  48. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
  49. snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
  50. snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
  51. snowflake/ml/modeling/covariance/oas.py +19 -3
  52. snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
  53. snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
  54. snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
  55. snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
  56. snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
  57. snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
  58. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
  59. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
  60. snowflake/ml/modeling/decomposition/pca.py +19 -3
  61. snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
  62. snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
  63. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
  64. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
  65. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
  66. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
  67. snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
  68. snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
  69. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
  70. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
  71. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
  72. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
  73. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
  75. snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
  76. snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
  77. snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
  78. snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
  79. snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
  80. snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
  81. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
  82. snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
  83. snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
  84. snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
  85. snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
  86. snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
  87. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
  88. snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
  89. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
  91. snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
  92. snowflake/ml/modeling/impute/knn_imputer.py +19 -3
  93. snowflake/ml/modeling/impute/missing_indicator.py +19 -3
  94. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
  95. snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
  96. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
  97. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
  98. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
  99. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
  100. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
  101. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
  102. snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
  103. snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
  104. snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
  105. snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
  106. snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
  107. snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
  108. snowflake/ml/modeling/linear_model/lars.py +19 -3
  109. snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
  110. snowflake/ml/modeling/linear_model/lasso.py +19 -3
  111. snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
  112. snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
  113. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
  114. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
  115. snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
  116. snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
  117. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
  118. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
  120. snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
  121. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
  122. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
  123. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
  124. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
  125. snowflake/ml/modeling/linear_model/perceptron.py +19 -3
  126. snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
  127. snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
  128. snowflake/ml/modeling/linear_model/ridge.py +19 -3
  129. snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
  130. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
  131. snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
  132. snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
  133. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
  134. snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
  135. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
  136. snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
  137. snowflake/ml/modeling/manifold/isomap.py +19 -3
  138. snowflake/ml/modeling/manifold/mds.py +19 -3
  139. snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
  140. snowflake/ml/modeling/manifold/tsne.py +19 -3
  141. snowflake/ml/modeling/metrics/classification.py +5 -6
  142. snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
  143. snowflake/ml/modeling/metrics/ranking.py +7 -3
  144. snowflake/ml/modeling/metrics/regression.py +6 -3
  145. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
  146. snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
  147. snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
  148. snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
  149. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
  150. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
  151. snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
  152. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
  153. snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
  154. snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
  155. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
  156. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
  157. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
  158. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
  159. snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
  160. snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
  161. snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
  162. snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
  163. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
  164. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
  165. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
  166. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
  167. snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
  168. snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
  169. snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
  170. snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
  171. snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
  172. snowflake/ml/modeling/svm/linear_svc.py +19 -3
  173. snowflake/ml/modeling/svm/linear_svr.py +19 -3
  174. snowflake/ml/modeling/svm/nu_svc.py +19 -3
  175. snowflake/ml/modeling/svm/nu_svr.py +19 -3
  176. snowflake/ml/modeling/svm/svc.py +19 -3
  177. snowflake/ml/modeling/svm/svr.py +19 -3
  178. snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
  179. snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
  180. snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
  181. snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
  182. snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
  183. snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
  184. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
  185. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
  186. snowflake/ml/registry/registry.py +2 -0
  187. snowflake/ml/version.py +1 -1
  188. snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
  189. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
  190. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
  191. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
  192. snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
  193. /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
  194. /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -184,7 +184,7 @@ class PolynomialFeatures(BaseTransformer):
184
184
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
185
185
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
186
186
  self._snowpark_cols: Optional[List[str]] = self.input_cols
187
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=PolynomialFeatures.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
187
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=PolynomialFeatures.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
188
188
  self._autogenerated = True
189
189
 
190
190
  def _get_rand_id(self) -> str:
@@ -544,6 +544,22 @@ class PolynomialFeatures(BaseTransformer):
544
544
  # each row containing a list of values.
545
545
  expected_dtype = "ARRAY"
546
546
 
547
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
548
+ if expected_dtype == "":
549
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
550
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
551
+ expected_dtype = "ARRAY"
552
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
553
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
554
+ expected_dtype = "ARRAY"
555
+ else:
556
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
557
+ # We can only infer the output types from the input types if the following two statemetns are true:
558
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
559
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
560
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
561
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
562
+
547
563
  output_df = self._batch_inference(
548
564
  dataset=dataset,
549
565
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -190,7 +190,7 @@ class LabelPropagation(BaseTransformer):
190
190
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
191
191
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
192
192
  self._snowpark_cols: Optional[List[str]] = self.input_cols
193
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LabelPropagation.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
193
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LabelPropagation.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
194
194
  self._autogenerated = True
195
195
 
196
196
  def _get_rand_id(self) -> str:
@@ -550,6 +550,22 @@ class LabelPropagation(BaseTransformer):
550
550
  # each row containing a list of values.
551
551
  expected_dtype = "ARRAY"
552
552
 
553
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
554
+ if expected_dtype == "":
555
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
556
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
557
+ expected_dtype = "ARRAY"
558
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
559
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
560
+ expected_dtype = "ARRAY"
561
+ else:
562
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
563
+ # We can only infer the output types from the input types if the following two statemetns are true:
564
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
565
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
566
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
567
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
568
+
553
569
  output_df = self._batch_inference(
554
570
  dataset=dataset,
555
571
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -199,7 +199,7 @@ class LabelSpreading(BaseTransformer):
199
199
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
200
200
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
201
201
  self._snowpark_cols: Optional[List[str]] = self.input_cols
202
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LabelSpreading.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
202
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LabelSpreading.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
203
203
  self._autogenerated = True
204
204
 
205
205
  def _get_rand_id(self) -> str:
@@ -559,6 +559,22 @@ class LabelSpreading(BaseTransformer):
559
559
  # each row containing a list of values.
560
560
  expected_dtype = "ARRAY"
561
561
 
562
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
563
+ if expected_dtype == "":
564
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
565
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
566
+ expected_dtype = "ARRAY"
567
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
568
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
569
+ expected_dtype = "ARRAY"
570
+ else:
571
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
572
+ # We can only infer the output types from the input types if the following two statemetns are true:
573
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
574
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
575
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
576
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
577
+
562
578
  output_df = self._batch_inference(
563
579
  dataset=dataset,
564
580
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -255,7 +255,7 @@ class LinearSVC(BaseTransformer):
255
255
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
256
256
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
257
257
  self._snowpark_cols: Optional[List[str]] = self.input_cols
258
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LinearSVC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
258
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LinearSVC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
259
259
  self._autogenerated = True
260
260
 
261
261
  def _get_rand_id(self) -> str:
@@ -615,6 +615,22 @@ class LinearSVC(BaseTransformer):
615
615
  # each row containing a list of values.
616
616
  expected_dtype = "ARRAY"
617
617
 
618
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
619
+ if expected_dtype == "":
620
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
621
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
622
+ expected_dtype = "ARRAY"
623
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
624
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
625
+ expected_dtype = "ARRAY"
626
+ else:
627
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
628
+ # We can only infer the output types from the input types if the following two statemetns are true:
629
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
630
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
631
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
632
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
633
+
618
634
  output_df = self._batch_inference(
619
635
  dataset=dataset,
620
636
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -227,7 +227,7 @@ class LinearSVR(BaseTransformer):
227
227
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
228
228
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
229
229
  self._snowpark_cols: Optional[List[str]] = self.input_cols
230
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LinearSVR.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
230
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LinearSVR.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
231
231
  self._autogenerated = True
232
232
 
233
233
  def _get_rand_id(self) -> str:
@@ -587,6 +587,22 @@ class LinearSVR(BaseTransformer):
587
587
  # each row containing a list of values.
588
588
  expected_dtype = "ARRAY"
589
589
 
590
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
591
+ if expected_dtype == "":
592
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
593
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
594
+ expected_dtype = "ARRAY"
595
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
596
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
597
+ expected_dtype = "ARRAY"
598
+ else:
599
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
600
+ # We can only infer the output types from the input types if the following two statemetns are true:
601
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
602
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
603
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
604
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
605
+
590
606
  output_df = self._batch_inference(
591
607
  dataset=dataset,
592
608
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -261,7 +261,7 @@ class NuSVC(BaseTransformer):
261
261
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
262
262
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
263
263
  self._snowpark_cols: Optional[List[str]] = self.input_cols
264
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=NuSVC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
264
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=NuSVC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
265
265
  self._autogenerated = True
266
266
 
267
267
  def _get_rand_id(self) -> str:
@@ -621,6 +621,22 @@ class NuSVC(BaseTransformer):
621
621
  # each row containing a list of values.
622
622
  expected_dtype = "ARRAY"
623
623
 
624
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
625
+ if expected_dtype == "":
626
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
627
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
628
+ expected_dtype = "ARRAY"
629
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
630
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
631
+ expected_dtype = "ARRAY"
632
+ else:
633
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
634
+ # We can only infer the output types from the input types if the following two statemetns are true:
635
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
636
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
637
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
638
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
639
+
624
640
  output_df = self._batch_inference(
625
641
  dataset=dataset,
626
642
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -222,7 +222,7 @@ class NuSVR(BaseTransformer):
222
222
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
223
223
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
224
224
  self._snowpark_cols: Optional[List[str]] = self.input_cols
225
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=NuSVR.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
225
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=NuSVR.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
226
226
  self._autogenerated = True
227
227
 
228
228
  def _get_rand_id(self) -> str:
@@ -582,6 +582,22 @@ class NuSVR(BaseTransformer):
582
582
  # each row containing a list of values.
583
583
  expected_dtype = "ARRAY"
584
584
 
585
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
586
+ if expected_dtype == "":
587
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
588
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
589
+ expected_dtype = "ARRAY"
590
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
591
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
592
+ expected_dtype = "ARRAY"
593
+ else:
594
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
595
+ # We can only infer the output types from the input types if the following two statemetns are true:
596
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
597
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
598
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
599
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
600
+
585
601
  output_df = self._batch_inference(
586
602
  dataset=dataset,
587
603
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -264,7 +264,7 @@ class SVC(BaseTransformer):
264
264
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
265
265
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
266
266
  self._snowpark_cols: Optional[List[str]] = self.input_cols
267
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=SVC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
267
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=SVC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
268
268
  self._autogenerated = True
269
269
 
270
270
  def _get_rand_id(self) -> str:
@@ -624,6 +624,22 @@ class SVC(BaseTransformer):
624
624
  # each row containing a list of values.
625
625
  expected_dtype = "ARRAY"
626
626
 
627
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
628
+ if expected_dtype == "":
629
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
630
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
631
+ expected_dtype = "ARRAY"
632
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
633
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
634
+ expected_dtype = "ARRAY"
635
+ else:
636
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
637
+ # We can only infer the output types from the input types if the following two statemetns are true:
638
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
639
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
640
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
641
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
642
+
627
643
  output_df = self._batch_inference(
628
644
  dataset=dataset,
629
645
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -225,7 +225,7 @@ class SVR(BaseTransformer):
225
225
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
226
226
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
227
227
  self._snowpark_cols: Optional[List[str]] = self.input_cols
228
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=SVR.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
228
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=SVR.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
229
229
  self._autogenerated = True
230
230
 
231
231
  def _get_rand_id(self) -> str:
@@ -585,6 +585,22 @@ class SVR(BaseTransformer):
585
585
  # each row containing a list of values.
586
586
  expected_dtype = "ARRAY"
587
587
 
588
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
589
+ if expected_dtype == "":
590
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
591
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
592
+ expected_dtype = "ARRAY"
593
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
594
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
595
+ expected_dtype = "ARRAY"
596
+ else:
597
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
598
+ # We can only infer the output types from the input types if the following two statemetns are true:
599
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
600
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
601
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
602
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
603
+
588
604
  output_df = self._batch_inference(
589
605
  dataset=dataset,
590
606
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -292,7 +292,7 @@ class DecisionTreeClassifier(BaseTransformer):
292
292
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
293
293
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
294
294
  self._snowpark_cols: Optional[List[str]] = self.input_cols
295
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=DecisionTreeClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
295
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=DecisionTreeClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
296
296
  self._autogenerated = True
297
297
 
298
298
  def _get_rand_id(self) -> str:
@@ -652,6 +652,22 @@ class DecisionTreeClassifier(BaseTransformer):
652
652
  # each row containing a list of values.
653
653
  expected_dtype = "ARRAY"
654
654
 
655
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
656
+ if expected_dtype == "":
657
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
658
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
659
+ expected_dtype = "ARRAY"
660
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
661
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
662
+ expected_dtype = "ARRAY"
663
+ else:
664
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
665
+ # We can only infer the output types from the input types if the following two statemetns are true:
666
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
667
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
668
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
669
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
670
+
655
671
  output_df = self._batch_inference(
656
672
  dataset=dataset,
657
673
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -274,7 +274,7 @@ class DecisionTreeRegressor(BaseTransformer):
274
274
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
275
275
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
276
276
  self._snowpark_cols: Optional[List[str]] = self.input_cols
277
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=DecisionTreeRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
277
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=DecisionTreeRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
278
278
  self._autogenerated = True
279
279
 
280
280
  def _get_rand_id(self) -> str:
@@ -634,6 +634,22 @@ class DecisionTreeRegressor(BaseTransformer):
634
634
  # each row containing a list of values.
635
635
  expected_dtype = "ARRAY"
636
636
 
637
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
638
+ if expected_dtype == "":
639
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
640
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
641
+ expected_dtype = "ARRAY"
642
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
643
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
644
+ expected_dtype = "ARRAY"
645
+ else:
646
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
647
+ # We can only infer the output types from the input types if the following two statemetns are true:
648
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
649
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
650
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
651
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
652
+
637
653
  output_df = self._batch_inference(
638
654
  dataset=dataset,
639
655
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -284,7 +284,7 @@ class ExtraTreeClassifier(BaseTransformer):
284
284
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
285
285
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
286
286
  self._snowpark_cols: Optional[List[str]] = self.input_cols
287
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=ExtraTreeClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
287
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=ExtraTreeClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
288
288
  self._autogenerated = True
289
289
 
290
290
  def _get_rand_id(self) -> str:
@@ -644,6 +644,22 @@ class ExtraTreeClassifier(BaseTransformer):
644
644
  # each row containing a list of values.
645
645
  expected_dtype = "ARRAY"
646
646
 
647
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
648
+ if expected_dtype == "":
649
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
650
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
651
+ expected_dtype = "ARRAY"
652
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
653
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
654
+ expected_dtype = "ARRAY"
655
+ else:
656
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
657
+ # We can only infer the output types from the input types if the following two statemetns are true:
658
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
659
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
660
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
661
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
662
+
647
663
  output_df = self._batch_inference(
648
664
  dataset=dataset,
649
665
  inference_method="transform",