snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. snowflake/ml/_internal/env_utils.py +16 -13
  2. snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
  3. snowflake/ml/_internal/telemetry.py +19 -0
  4. snowflake/ml/feature_store/__init__.py +9 -0
  5. snowflake/ml/feature_store/entity.py +73 -0
  6. snowflake/ml/feature_store/feature_store.py +1657 -0
  7. snowflake/ml/feature_store/feature_view.py +459 -0
  8. snowflake/ml/model/_client/ops/model_ops.py +16 -38
  9. snowflake/ml/model/_client/sql/model.py +1 -7
  10. snowflake/ml/model/_client/sql/model_version.py +20 -15
  11. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
  12. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
  14. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
  15. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
  16. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
  17. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
  18. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
  19. snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
  20. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
  21. snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
  22. snowflake/ml/model/model_signature.py +72 -16
  23. snowflake/ml/model/type_hints.py +12 -0
  24. snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
  25. snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
  26. snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
  27. snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
  28. snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
  29. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
  30. snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
  31. snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
  32. snowflake/ml/modeling/cluster/birch.py +19 -3
  33. snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
  34. snowflake/ml/modeling/cluster/dbscan.py +19 -3
  35. snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
  36. snowflake/ml/modeling/cluster/k_means.py +19 -3
  37. snowflake/ml/modeling/cluster/mean_shift.py +19 -3
  38. snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
  39. snowflake/ml/modeling/cluster/optics.py +19 -3
  40. snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
  41. snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
  42. snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
  43. snowflake/ml/modeling/compose/column_transformer.py +19 -3
  44. snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
  45. snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
  46. snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
  47. snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
  48. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
  49. snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
  50. snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
  51. snowflake/ml/modeling/covariance/oas.py +19 -3
  52. snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
  53. snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
  54. snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
  55. snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
  56. snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
  57. snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
  58. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
  59. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
  60. snowflake/ml/modeling/decomposition/pca.py +19 -3
  61. snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
  62. snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
  63. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
  64. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
  65. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
  66. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
  67. snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
  68. snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
  69. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
  70. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
  71. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
  72. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
  73. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
  75. snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
  76. snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
  77. snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
  78. snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
  79. snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
  80. snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
  81. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
  82. snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
  83. snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
  84. snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
  85. snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
  86. snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
  87. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
  88. snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
  89. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
  91. snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
  92. snowflake/ml/modeling/impute/knn_imputer.py +19 -3
  93. snowflake/ml/modeling/impute/missing_indicator.py +19 -3
  94. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
  95. snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
  96. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
  97. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
  98. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
  99. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
  100. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
  101. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
  102. snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
  103. snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
  104. snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
  105. snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
  106. snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
  107. snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
  108. snowflake/ml/modeling/linear_model/lars.py +19 -3
  109. snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
  110. snowflake/ml/modeling/linear_model/lasso.py +19 -3
  111. snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
  112. snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
  113. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
  114. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
  115. snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
  116. snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
  117. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
  118. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
  120. snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
  121. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
  122. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
  123. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
  124. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
  125. snowflake/ml/modeling/linear_model/perceptron.py +19 -3
  126. snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
  127. snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
  128. snowflake/ml/modeling/linear_model/ridge.py +19 -3
  129. snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
  130. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
  131. snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
  132. snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
  133. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
  134. snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
  135. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
  136. snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
  137. snowflake/ml/modeling/manifold/isomap.py +19 -3
  138. snowflake/ml/modeling/manifold/mds.py +19 -3
  139. snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
  140. snowflake/ml/modeling/manifold/tsne.py +19 -3
  141. snowflake/ml/modeling/metrics/classification.py +5 -6
  142. snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
  143. snowflake/ml/modeling/metrics/ranking.py +7 -3
  144. snowflake/ml/modeling/metrics/regression.py +6 -3
  145. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
  146. snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
  147. snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
  148. snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
  149. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
  150. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
  151. snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
  152. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
  153. snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
  154. snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
  155. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
  156. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
  157. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
  158. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
  159. snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
  160. snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
  161. snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
  162. snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
  163. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
  164. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
  165. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
  166. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
  167. snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
  168. snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
  169. snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
  170. snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
  171. snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
  172. snowflake/ml/modeling/svm/linear_svc.py +19 -3
  173. snowflake/ml/modeling/svm/linear_svr.py +19 -3
  174. snowflake/ml/modeling/svm/nu_svc.py +19 -3
  175. snowflake/ml/modeling/svm/nu_svr.py +19 -3
  176. snowflake/ml/modeling/svm/svc.py +19 -3
  177. snowflake/ml/modeling/svm/svr.py +19 -3
  178. snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
  179. snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
  180. snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
  181. snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
  182. snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
  183. snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
  184. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
  185. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
  186. snowflake/ml/registry/registry.py +2 -0
  187. snowflake/ml/version.py +1 -1
  188. snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
  189. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
  190. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
  191. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
  192. snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
  193. /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
  194. /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -255,7 +255,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
255
255
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
256
256
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
257
257
  self._snowpark_cols: Optional[List[str]] = self.input_cols
258
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=MultiTaskElasticNetCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
258
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskElasticNetCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
259
259
  self._autogenerated = True
260
260
 
261
261
  def _get_rand_id(self) -> str:
@@ -615,6 +615,22 @@ class MultiTaskElasticNetCV(BaseTransformer):
615
615
  # each row containing a list of values.
616
616
  expected_dtype = "ARRAY"
617
617
 
618
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
619
+ if expected_dtype == "":
620
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
621
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
622
+ expected_dtype = "ARRAY"
623
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
624
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
625
+ expected_dtype = "ARRAY"
626
+ else:
627
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
628
+ # We can only infer the output types from the input types if the following two statemetns are true:
629
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
630
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
631
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
632
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
633
+
618
634
  output_df = self._batch_inference(
619
635
  dataset=dataset,
620
636
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -206,7 +206,7 @@ class MultiTaskLasso(BaseTransformer):
206
206
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
207
207
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
208
208
  self._snowpark_cols: Optional[List[str]] = self.input_cols
209
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=MultiTaskLasso.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
209
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskLasso.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
210
210
  self._autogenerated = True
211
211
 
212
212
  def _get_rand_id(self) -> str:
@@ -566,6 +566,22 @@ class MultiTaskLasso(BaseTransformer):
566
566
  # each row containing a list of values.
567
567
  expected_dtype = "ARRAY"
568
568
 
569
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
570
+ if expected_dtype == "":
571
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
572
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
573
+ expected_dtype = "ARRAY"
574
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
575
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
576
+ expected_dtype = "ARRAY"
577
+ else:
578
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
579
+ # We can only infer the output types from the input types if the following two statemetns are true:
580
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
581
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
582
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
583
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
584
+
569
585
  output_df = self._batch_inference(
570
586
  dataset=dataset,
571
587
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -241,7 +241,7 @@ class MultiTaskLassoCV(BaseTransformer):
241
241
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
242
242
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
243
243
  self._snowpark_cols: Optional[List[str]] = self.input_cols
244
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=MultiTaskLassoCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
244
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskLassoCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
245
245
  self._autogenerated = True
246
246
 
247
247
  def _get_rand_id(self) -> str:
@@ -601,6 +601,22 @@ class MultiTaskLassoCV(BaseTransformer):
601
601
  # each row containing a list of values.
602
602
  expected_dtype = "ARRAY"
603
603
 
604
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
605
+ if expected_dtype == "":
606
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
607
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
608
+ expected_dtype = "ARRAY"
609
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
610
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
611
+ expected_dtype = "ARRAY"
612
+ else:
613
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
614
+ # We can only infer the output types from the input types if the following two statemetns are true:
615
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
616
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
617
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
618
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
619
+
604
620
  output_df = self._batch_inference(
605
621
  dataset=dataset,
606
622
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -189,7 +189,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
189
189
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
190
190
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
191
191
  self._snowpark_cols: Optional[List[str]] = self.input_cols
192
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=OrthogonalMatchingPursuit.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
192
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=OrthogonalMatchingPursuit.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
193
193
  self._autogenerated = True
194
194
 
195
195
  def _get_rand_id(self) -> str:
@@ -549,6 +549,22 @@ class OrthogonalMatchingPursuit(BaseTransformer):
549
549
  # each row containing a list of values.
550
550
  expected_dtype = "ARRAY"
551
551
 
552
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
553
+ if expected_dtype == "":
554
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
555
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
556
+ expected_dtype = "ARRAY"
557
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
558
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
559
+ expected_dtype = "ARRAY"
560
+ else:
561
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
562
+ # We can only infer the output types from the input types if the following two statemetns are true:
563
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
564
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
565
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
566
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
567
+
552
568
  output_df = self._batch_inference(
553
569
  dataset=dataset,
554
570
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -263,7 +263,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
263
263
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
264
264
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
265
265
  self._snowpark_cols: Optional[List[str]] = self.input_cols
266
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=PassiveAggressiveClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
266
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=PassiveAggressiveClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
267
267
  self._autogenerated = True
268
268
 
269
269
  def _get_rand_id(self) -> str:
@@ -623,6 +623,22 @@ class PassiveAggressiveClassifier(BaseTransformer):
623
623
  # each row containing a list of values.
624
624
  expected_dtype = "ARRAY"
625
625
 
626
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
627
+ if expected_dtype == "":
628
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
629
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
630
+ expected_dtype = "ARRAY"
631
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
632
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
633
+ expected_dtype = "ARRAY"
634
+ else:
635
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
636
+ # We can only infer the output types from the input types if the following two statemetns are true:
637
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
638
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
639
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
640
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
641
+
626
642
  output_df = self._batch_inference(
627
643
  dataset=dataset,
628
644
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -249,7 +249,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
249
249
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
250
250
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
251
251
  self._snowpark_cols: Optional[List[str]] = self.input_cols
252
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=PassiveAggressiveRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
252
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=PassiveAggressiveRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
253
253
  self._autogenerated = True
254
254
 
255
255
  def _get_rand_id(self) -> str:
@@ -609,6 +609,22 @@ class PassiveAggressiveRegressor(BaseTransformer):
609
609
  # each row containing a list of values.
610
610
  expected_dtype = "ARRAY"
611
611
 
612
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
613
+ if expected_dtype == "":
614
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
615
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
616
+ expected_dtype = "ARRAY"
617
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
618
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
619
+ expected_dtype = "ARRAY"
620
+ else:
621
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
622
+ # We can only infer the output types from the input types if the following two statemetns are true:
623
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
624
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
625
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
626
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
627
+
612
628
  output_df = self._batch_inference(
613
629
  dataset=dataset,
614
630
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -262,7 +262,7 @@ class Perceptron(BaseTransformer):
262
262
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
263
263
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
264
264
  self._snowpark_cols: Optional[List[str]] = self.input_cols
265
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=Perceptron.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
265
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=Perceptron.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
266
266
  self._autogenerated = True
267
267
 
268
268
  def _get_rand_id(self) -> str:
@@ -622,6 +622,22 @@ class Perceptron(BaseTransformer):
622
622
  # each row containing a list of values.
623
623
  expected_dtype = "ARRAY"
624
624
 
625
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
626
+ if expected_dtype == "":
627
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
628
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
629
+ expected_dtype = "ARRAY"
630
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
631
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
632
+ expected_dtype = "ARRAY"
633
+ else:
634
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
635
+ # We can only infer the output types from the input types if the following two statemetns are true:
636
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
637
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
638
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
639
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
640
+
625
641
  output_df = self._batch_inference(
626
642
  dataset=dataset,
627
643
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -211,7 +211,7 @@ class PoissonRegressor(BaseTransformer):
211
211
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
212
212
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
213
213
  self._snowpark_cols: Optional[List[str]] = self.input_cols
214
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=PoissonRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
214
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=PoissonRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
215
215
  self._autogenerated = True
216
216
 
217
217
  def _get_rand_id(self) -> str:
@@ -571,6 +571,22 @@ class PoissonRegressor(BaseTransformer):
571
571
  # each row containing a list of values.
572
572
  expected_dtype = "ARRAY"
573
573
 
574
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
575
+ if expected_dtype == "":
576
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
577
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
578
+ expected_dtype = "ARRAY"
579
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
580
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
581
+ expected_dtype = "ARRAY"
582
+ else:
583
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
584
+ # We can only infer the output types from the input types if the following two statemetns are true:
585
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
586
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
587
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
588
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
589
+
574
590
  output_df = self._batch_inference(
575
591
  dataset=dataset,
576
592
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -267,7 +267,7 @@ class RANSACRegressor(BaseTransformer):
267
267
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
268
268
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
269
269
  self._snowpark_cols: Optional[List[str]] = self.input_cols
270
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=RANSACRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
270
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=RANSACRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
271
271
  self._autogenerated = True
272
272
 
273
273
  def _get_rand_id(self) -> str:
@@ -627,6 +627,22 @@ class RANSACRegressor(BaseTransformer):
627
627
  # each row containing a list of values.
628
628
  expected_dtype = "ARRAY"
629
629
 
630
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
631
+ if expected_dtype == "":
632
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
633
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
634
+ expected_dtype = "ARRAY"
635
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
636
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
637
+ expected_dtype = "ARRAY"
638
+ else:
639
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
640
+ # We can only infer the output types from the input types if the following two statemetns are true:
641
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
642
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
643
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
644
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
645
+
630
646
  output_df = self._batch_inference(
631
647
  dataset=dataset,
632
648
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -259,7 +259,7 @@ class Ridge(BaseTransformer):
259
259
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
260
260
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
261
261
  self._snowpark_cols: Optional[List[str]] = self.input_cols
262
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=Ridge.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
262
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=Ridge.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
263
263
  self._autogenerated = True
264
264
 
265
265
  def _get_rand_id(self) -> str:
@@ -619,6 +619,22 @@ class Ridge(BaseTransformer):
619
619
  # each row containing a list of values.
620
620
  expected_dtype = "ARRAY"
621
621
 
622
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
623
+ if expected_dtype == "":
624
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
625
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
626
+ expected_dtype = "ARRAY"
627
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
628
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
629
+ expected_dtype = "ARRAY"
630
+ else:
631
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
632
+ # We can only infer the output types from the input types if the following two statemetns are true:
633
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
634
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
635
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
636
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
637
+
622
638
  output_df = self._batch_inference(
623
639
  dataset=dataset,
624
640
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -259,7 +259,7 @@ class RidgeClassifier(BaseTransformer):
259
259
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
260
260
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
261
261
  self._snowpark_cols: Optional[List[str]] = self.input_cols
262
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=RidgeClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
262
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=RidgeClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
263
263
  self._autogenerated = True
264
264
 
265
265
  def _get_rand_id(self) -> str:
@@ -619,6 +619,22 @@ class RidgeClassifier(BaseTransformer):
619
619
  # each row containing a list of values.
620
620
  expected_dtype = "ARRAY"
621
621
 
622
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
623
+ if expected_dtype == "":
624
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
625
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
626
+ expected_dtype = "ARRAY"
627
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
628
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
629
+ expected_dtype = "ARRAY"
630
+ else:
631
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
632
+ # We can only infer the output types from the input types if the following two statemetns are true:
633
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
634
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
635
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
636
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
637
+
622
638
  output_df = self._batch_inference(
623
639
  dataset=dataset,
624
640
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -210,7 +210,7 @@ class RidgeClassifierCV(BaseTransformer):
210
210
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
211
211
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
212
212
  self._snowpark_cols: Optional[List[str]] = self.input_cols
213
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=RidgeClassifierCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
213
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=RidgeClassifierCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
214
214
  self._autogenerated = True
215
215
 
216
216
  def _get_rand_id(self) -> str:
@@ -570,6 +570,22 @@ class RidgeClassifierCV(BaseTransformer):
570
570
  # each row containing a list of values.
571
571
  expected_dtype = "ARRAY"
572
572
 
573
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
574
+ if expected_dtype == "":
575
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
576
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
577
+ expected_dtype = "ARRAY"
578
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
579
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
580
+ expected_dtype = "ARRAY"
581
+ else:
582
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
583
+ # We can only infer the output types from the input types if the following two statemetns are true:
584
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
585
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
586
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
587
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
588
+
573
589
  output_df = self._batch_inference(
574
590
  dataset=dataset,
575
591
  inference_method="transform",