snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. snowflake/ml/_internal/env_utils.py +16 -13
  2. snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
  3. snowflake/ml/_internal/telemetry.py +19 -0
  4. snowflake/ml/feature_store/__init__.py +9 -0
  5. snowflake/ml/feature_store/entity.py +73 -0
  6. snowflake/ml/feature_store/feature_store.py +1657 -0
  7. snowflake/ml/feature_store/feature_view.py +459 -0
  8. snowflake/ml/model/_client/ops/model_ops.py +16 -38
  9. snowflake/ml/model/_client/sql/model.py +1 -7
  10. snowflake/ml/model/_client/sql/model_version.py +20 -15
  11. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
  12. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
  14. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
  15. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
  16. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
  17. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
  18. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
  19. snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
  20. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
  21. snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
  22. snowflake/ml/model/model_signature.py +72 -16
  23. snowflake/ml/model/type_hints.py +12 -0
  24. snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
  25. snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
  26. snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
  27. snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
  28. snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
  29. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
  30. snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
  31. snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
  32. snowflake/ml/modeling/cluster/birch.py +19 -3
  33. snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
  34. snowflake/ml/modeling/cluster/dbscan.py +19 -3
  35. snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
  36. snowflake/ml/modeling/cluster/k_means.py +19 -3
  37. snowflake/ml/modeling/cluster/mean_shift.py +19 -3
  38. snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
  39. snowflake/ml/modeling/cluster/optics.py +19 -3
  40. snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
  41. snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
  42. snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
  43. snowflake/ml/modeling/compose/column_transformer.py +19 -3
  44. snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
  45. snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
  46. snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
  47. snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
  48. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
  49. snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
  50. snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
  51. snowflake/ml/modeling/covariance/oas.py +19 -3
  52. snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
  53. snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
  54. snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
  55. snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
  56. snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
  57. snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
  58. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
  59. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
  60. snowflake/ml/modeling/decomposition/pca.py +19 -3
  61. snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
  62. snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
  63. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
  64. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
  65. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
  66. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
  67. snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
  68. snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
  69. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
  70. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
  71. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
  72. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
  73. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
  75. snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
  76. snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
  77. snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
  78. snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
  79. snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
  80. snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
  81. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
  82. snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
  83. snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
  84. snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
  85. snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
  86. snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
  87. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
  88. snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
  89. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
  91. snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
  92. snowflake/ml/modeling/impute/knn_imputer.py +19 -3
  93. snowflake/ml/modeling/impute/missing_indicator.py +19 -3
  94. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
  95. snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
  96. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
  97. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
  98. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
  99. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
  100. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
  101. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
  102. snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
  103. snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
  104. snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
  105. snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
  106. snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
  107. snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
  108. snowflake/ml/modeling/linear_model/lars.py +19 -3
  109. snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
  110. snowflake/ml/modeling/linear_model/lasso.py +19 -3
  111. snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
  112. snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
  113. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
  114. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
  115. snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
  116. snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
  117. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
  118. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
  120. snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
  121. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
  122. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
  123. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
  124. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
  125. snowflake/ml/modeling/linear_model/perceptron.py +19 -3
  126. snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
  127. snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
  128. snowflake/ml/modeling/linear_model/ridge.py +19 -3
  129. snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
  130. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
  131. snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
  132. snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
  133. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
  134. snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
  135. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
  136. snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
  137. snowflake/ml/modeling/manifold/isomap.py +19 -3
  138. snowflake/ml/modeling/manifold/mds.py +19 -3
  139. snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
  140. snowflake/ml/modeling/manifold/tsne.py +19 -3
  141. snowflake/ml/modeling/metrics/classification.py +5 -6
  142. snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
  143. snowflake/ml/modeling/metrics/ranking.py +7 -3
  144. snowflake/ml/modeling/metrics/regression.py +6 -3
  145. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
  146. snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
  147. snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
  148. snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
  149. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
  150. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
  151. snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
  152. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
  153. snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
  154. snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
  155. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
  156. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
  157. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
  158. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
  159. snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
  160. snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
  161. snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
  162. snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
  163. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
  164. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
  165. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
  166. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
  167. snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
  168. snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
  169. snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
  170. snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
  171. snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
  172. snowflake/ml/modeling/svm/linear_svc.py +19 -3
  173. snowflake/ml/modeling/svm/linear_svr.py +19 -3
  174. snowflake/ml/modeling/svm/nu_svc.py +19 -3
  175. snowflake/ml/modeling/svm/nu_svr.py +19 -3
  176. snowflake/ml/modeling/svm/svc.py +19 -3
  177. snowflake/ml/modeling/svm/svr.py +19 -3
  178. snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
  179. snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
  180. snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
  181. snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
  182. snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
  183. snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
  184. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
  185. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
  186. snowflake/ml/registry/registry.py +2 -0
  187. snowflake/ml/version.py +1 -1
  188. snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
  189. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
  190. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
  191. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
  192. snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
  193. /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
  194. /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
27
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
28
28
  from snowflake.snowpark import DataFrame, Session
29
29
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
31
31
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
32
32
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
33
33
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
36
36
  transform_snowml_obj_to_sklearn_obj,
37
37
  validate_sklearn_args,
38
38
  )
39
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
39
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
40
40
 
41
41
  from snowflake.ml.model.model_signature import (
42
42
  DataType,
@@ -167,7 +167,7 @@ class SelectFpr(BaseTransformer):
167
167
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
168
168
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
169
169
  self._snowpark_cols: Optional[List[str]] = self.input_cols
170
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=SelectFpr.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
170
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=SelectFpr.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
171
171
  self._autogenerated = True
172
172
 
173
173
  def _get_rand_id(self) -> str:
@@ -527,6 +527,22 @@ class SelectFpr(BaseTransformer):
527
527
  # each row containing a list of values.
528
528
  expected_dtype = "ARRAY"
529
529
 
530
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
531
+ if expected_dtype == "":
532
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
533
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
534
+ expected_dtype = "ARRAY"
535
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
536
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
537
+ expected_dtype = "ARRAY"
538
+ else:
539
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
540
+ # We can only infer the output types from the input types if the following two statemetns are true:
541
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
542
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
543
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
544
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
545
+
530
546
  output_df = self._batch_inference(
531
547
  dataset=dataset,
532
548
  inference_method="transform",
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
27
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
28
28
  from snowflake.snowpark import DataFrame, Session
29
29
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
31
31
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
32
32
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
33
33
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
36
36
  transform_snowml_obj_to_sklearn_obj,
37
37
  validate_sklearn_args,
38
38
  )
39
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
39
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
40
40
 
41
41
  from snowflake.ml.model.model_signature import (
42
42
  DataType,
@@ -167,7 +167,7 @@ class SelectFwe(BaseTransformer):
167
167
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
168
168
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
169
169
  self._snowpark_cols: Optional[List[str]] = self.input_cols
170
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=SelectFwe.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
170
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=SelectFwe.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
171
171
  self._autogenerated = True
172
172
 
173
173
  def _get_rand_id(self) -> str:
@@ -527,6 +527,22 @@ class SelectFwe(BaseTransformer):
527
527
  # each row containing a list of values.
528
528
  expected_dtype = "ARRAY"
529
529
 
530
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
531
+ if expected_dtype == "":
532
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
533
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
534
+ expected_dtype = "ARRAY"
535
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
536
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
537
+ expected_dtype = "ARRAY"
538
+ else:
539
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
540
+ # We can only infer the output types from the input types if the following two statemetns are true:
541
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
542
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
543
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
544
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
545
+
530
546
  output_df = self._batch_inference(
531
547
  dataset=dataset,
532
548
  inference_method="transform",
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
27
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
28
28
  from snowflake.snowpark import DataFrame, Session
29
29
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
31
31
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
32
32
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
33
33
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
36
36
  transform_snowml_obj_to_sklearn_obj,
37
37
  validate_sklearn_args,
38
38
  )
39
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
39
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
40
40
 
41
41
  from snowflake.ml.model.model_signature import (
42
42
  DataType,
@@ -168,7 +168,7 @@ class SelectKBest(BaseTransformer):
168
168
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
169
169
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
170
170
  self._snowpark_cols: Optional[List[str]] = self.input_cols
171
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=SelectKBest.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
171
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=SelectKBest.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
172
172
  self._autogenerated = True
173
173
 
174
174
  def _get_rand_id(self) -> str:
@@ -528,6 +528,22 @@ class SelectKBest(BaseTransformer):
528
528
  # each row containing a list of values.
529
529
  expected_dtype = "ARRAY"
530
530
 
531
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
532
+ if expected_dtype == "":
533
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
534
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
535
+ expected_dtype = "ARRAY"
536
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
537
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
538
+ expected_dtype = "ARRAY"
539
+ else:
540
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
541
+ # We can only infer the output types from the input types if the following two statemetns are true:
542
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
543
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
544
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
545
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
546
+
531
547
  output_df = self._batch_inference(
532
548
  dataset=dataset,
533
549
  inference_method="transform",
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
27
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
28
28
  from snowflake.snowpark import DataFrame, Session
29
29
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
31
31
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
32
32
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
33
33
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
36
36
  transform_snowml_obj_to_sklearn_obj,
37
37
  validate_sklearn_args,
38
38
  )
39
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
39
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
40
40
 
41
41
  from snowflake.ml.model.model_signature import (
42
42
  DataType,
@@ -167,7 +167,7 @@ class SelectPercentile(BaseTransformer):
167
167
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
168
168
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
169
169
  self._snowpark_cols: Optional[List[str]] = self.input_cols
170
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=SelectPercentile.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
170
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=SelectPercentile.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
171
171
  self._autogenerated = True
172
172
 
173
173
  def _get_rand_id(self) -> str:
@@ -527,6 +527,22 @@ class SelectPercentile(BaseTransformer):
527
527
  # each row containing a list of values.
528
528
  expected_dtype = "ARRAY"
529
529
 
530
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
531
+ if expected_dtype == "":
532
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
533
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
534
+ expected_dtype = "ARRAY"
535
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
536
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
537
+ expected_dtype = "ARRAY"
538
+ else:
539
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
540
+ # We can only infer the output types from the input types if the following two statemetns are true:
541
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
542
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
543
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
544
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
545
+
530
546
  output_df = self._batch_inference(
531
547
  dataset=dataset,
532
548
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -225,7 +225,7 @@ class SequentialFeatureSelector(BaseTransformer):
225
225
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
226
226
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
227
227
  self._snowpark_cols: Optional[List[str]] = self.input_cols
228
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=SequentialFeatureSelector.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
228
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=SequentialFeatureSelector.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
229
229
  self._autogenerated = True
230
230
 
231
231
  def _get_rand_id(self) -> str:
@@ -585,6 +585,22 @@ class SequentialFeatureSelector(BaseTransformer):
585
585
  # each row containing a list of values.
586
586
  expected_dtype = "ARRAY"
587
587
 
588
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
589
+ if expected_dtype == "":
590
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
591
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
592
+ expected_dtype = "ARRAY"
593
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
594
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
595
+ expected_dtype = "ARRAY"
596
+ else:
597
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
598
+ # We can only infer the output types from the input types if the following two statemetns are true:
599
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
600
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
601
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
602
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
603
+
588
604
  output_df = self._batch_inference(
589
605
  dataset=dataset,
590
606
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -158,7 +158,7 @@ class VarianceThreshold(BaseTransformer):
158
158
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
159
159
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
160
160
  self._snowpark_cols: Optional[List[str]] = self.input_cols
161
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=VarianceThreshold.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
161
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=VarianceThreshold.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
162
162
  self._autogenerated = True
163
163
 
164
164
  def _get_rand_id(self) -> str:
@@ -518,6 +518,22 @@ class VarianceThreshold(BaseTransformer):
518
518
  # each row containing a list of values.
519
519
  expected_dtype = "ARRAY"
520
520
 
521
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
522
+ if expected_dtype == "":
523
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
524
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
525
+ expected_dtype = "ARRAY"
526
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
527
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
528
+ expected_dtype = "ARRAY"
529
+ else:
530
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
531
+ # We can only infer the output types from the input types if the following two statemetns are true:
532
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
533
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
534
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
535
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
536
+
521
537
  output_df = self._batch_inference(
522
538
  dataset=dataset,
523
539
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -253,7 +253,7 @@ class GaussianProcessClassifier(BaseTransformer):
253
253
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
254
254
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
255
255
  self._snowpark_cols: Optional[List[str]] = self.input_cols
256
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=GaussianProcessClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
256
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=GaussianProcessClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
257
257
  self._autogenerated = True
258
258
 
259
259
  def _get_rand_id(self) -> str:
@@ -613,6 +613,22 @@ class GaussianProcessClassifier(BaseTransformer):
613
613
  # each row containing a list of values.
614
614
  expected_dtype = "ARRAY"
615
615
 
616
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
617
+ if expected_dtype == "":
618
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
619
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
620
+ expected_dtype = "ARRAY"
621
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
622
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
623
+ expected_dtype = "ARRAY"
624
+ else:
625
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
626
+ # We can only infer the output types from the input types if the following two statemetns are true:
627
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
628
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
629
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
630
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
631
+
616
632
  output_df = self._batch_inference(
617
633
  dataset=dataset,
618
634
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -244,7 +244,7 @@ class GaussianProcessRegressor(BaseTransformer):
244
244
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
245
245
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
246
246
  self._snowpark_cols: Optional[List[str]] = self.input_cols
247
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=GaussianProcessRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
247
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=GaussianProcessRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
248
248
  self._autogenerated = True
249
249
 
250
250
  def _get_rand_id(self) -> str:
@@ -604,6 +604,22 @@ class GaussianProcessRegressor(BaseTransformer):
604
604
  # each row containing a list of values.
605
605
  expected_dtype = "ARRAY"
606
606
 
607
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
608
+ if expected_dtype == "":
609
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
610
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
611
+ expected_dtype = "ARRAY"
612
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
613
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
614
+ expected_dtype = "ARRAY"
615
+ else:
616
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
617
+ # We can only infer the output types from the input types if the following two statemetns are true:
618
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
619
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
620
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
621
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
622
+
607
623
  output_df = self._batch_inference(
608
624
  dataset=dataset,
609
625
  inference_method="transform",
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
27
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
28
28
  from snowflake.snowpark import DataFrame, Session
29
29
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
31
31
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
32
32
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
33
33
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
36
36
  transform_snowml_obj_to_sklearn_obj,
37
37
  validate_sklearn_args,
38
38
  )
39
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
39
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
40
40
 
41
41
  from snowflake.ml.model.model_signature import (
42
42
  DataType,
@@ -286,7 +286,7 @@ class IterativeImputer(BaseTransformer):
286
286
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
287
287
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
288
288
  self._snowpark_cols: Optional[List[str]] = self.input_cols
289
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=IterativeImputer.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
289
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=IterativeImputer.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
290
290
  self._autogenerated = True
291
291
 
292
292
  def _get_rand_id(self) -> str:
@@ -646,6 +646,22 @@ class IterativeImputer(BaseTransformer):
646
646
  # each row containing a list of values.
647
647
  expected_dtype = "ARRAY"
648
648
 
649
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
650
+ if expected_dtype == "":
651
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
652
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
653
+ expected_dtype = "ARRAY"
654
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
655
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
656
+ expected_dtype = "ARRAY"
657
+ else:
658
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
659
+ # We can only infer the output types from the input types if the following two statemetns are true:
660
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
661
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
662
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
663
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
664
+
649
665
  output_df = self._batch_inference(
650
666
  dataset=dataset,
651
667
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -212,7 +212,7 @@ class KNNImputer(BaseTransformer):
212
212
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
213
213
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
214
214
  self._snowpark_cols: Optional[List[str]] = self.input_cols
215
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=KNNImputer.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
215
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=KNNImputer.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
216
216
  self._autogenerated = True
217
217
 
218
218
  def _get_rand_id(self) -> str:
@@ -572,6 +572,22 @@ class KNNImputer(BaseTransformer):
572
572
  # each row containing a list of values.
573
573
  expected_dtype = "ARRAY"
574
574
 
575
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
576
+ if expected_dtype == "":
577
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
578
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
579
+ expected_dtype = "ARRAY"
580
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
581
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
582
+ expected_dtype = "ARRAY"
583
+ else:
584
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
585
+ # We can only infer the output types from the input types if the following two statemetns are true:
586
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
587
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
588
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
589
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
590
+
575
591
  output_df = self._batch_inference(
576
592
  dataset=dataset,
577
593
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -186,7 +186,7 @@ class MissingIndicator(BaseTransformer):
186
186
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
187
187
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
188
188
  self._snowpark_cols: Optional[List[str]] = self.input_cols
189
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=MissingIndicator.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
189
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=MissingIndicator.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
190
190
  self._autogenerated = True
191
191
 
192
192
  def _get_rand_id(self) -> str:
@@ -546,6 +546,22 @@ class MissingIndicator(BaseTransformer):
546
546
  # each row containing a list of values.
547
547
  expected_dtype = "ARRAY"
548
548
 
549
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
550
+ if expected_dtype == "":
551
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
552
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
553
+ expected_dtype = "ARRAY"
554
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
555
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
556
+ expected_dtype = "ARRAY"
557
+ else:
558
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
559
+ # We can only infer the output types from the input types if the following two statemetns are true:
560
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
561
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
562
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
563
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
564
+
549
565
  output_df = self._batch_inference(
550
566
  dataset=dataset,
551
567
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -161,7 +161,7 @@ class AdditiveChi2Sampler(BaseTransformer):
161
161
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
162
162
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
163
163
  self._snowpark_cols: Optional[List[str]] = self.input_cols
164
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=AdditiveChi2Sampler.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
164
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=AdditiveChi2Sampler.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
165
165
  self._autogenerated = True
166
166
 
167
167
  def _get_rand_id(self) -> str:
@@ -521,6 +521,22 @@ class AdditiveChi2Sampler(BaseTransformer):
521
521
  # each row containing a list of values.
522
522
  expected_dtype = "ARRAY"
523
523
 
524
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
525
+ if expected_dtype == "":
526
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
527
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
528
+ expected_dtype = "ARRAY"
529
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
530
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
531
+ expected_dtype = "ARRAY"
532
+ else:
533
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
534
+ # We can only infer the output types from the input types if the following two statemetns are true:
535
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
536
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
537
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
538
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
539
+
524
540
  output_df = self._batch_inference(
525
541
  dataset=dataset,
526
542
  inference_method="transform",