snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. snowflake/ml/_internal/env_utils.py +16 -13
  2. snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
  3. snowflake/ml/_internal/telemetry.py +19 -0
  4. snowflake/ml/feature_store/__init__.py +9 -0
  5. snowflake/ml/feature_store/entity.py +73 -0
  6. snowflake/ml/feature_store/feature_store.py +1657 -0
  7. snowflake/ml/feature_store/feature_view.py +459 -0
  8. snowflake/ml/model/_client/ops/model_ops.py +16 -38
  9. snowflake/ml/model/_client/sql/model.py +1 -7
  10. snowflake/ml/model/_client/sql/model_version.py +20 -15
  11. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
  12. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
  14. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
  15. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
  16. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
  17. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
  18. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
  19. snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
  20. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
  21. snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
  22. snowflake/ml/model/model_signature.py +72 -16
  23. snowflake/ml/model/type_hints.py +12 -0
  24. snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
  25. snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
  26. snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
  27. snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
  28. snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
  29. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
  30. snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
  31. snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
  32. snowflake/ml/modeling/cluster/birch.py +19 -3
  33. snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
  34. snowflake/ml/modeling/cluster/dbscan.py +19 -3
  35. snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
  36. snowflake/ml/modeling/cluster/k_means.py +19 -3
  37. snowflake/ml/modeling/cluster/mean_shift.py +19 -3
  38. snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
  39. snowflake/ml/modeling/cluster/optics.py +19 -3
  40. snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
  41. snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
  42. snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
  43. snowflake/ml/modeling/compose/column_transformer.py +19 -3
  44. snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
  45. snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
  46. snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
  47. snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
  48. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
  49. snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
  50. snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
  51. snowflake/ml/modeling/covariance/oas.py +19 -3
  52. snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
  53. snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
  54. snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
  55. snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
  56. snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
  57. snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
  58. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
  59. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
  60. snowflake/ml/modeling/decomposition/pca.py +19 -3
  61. snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
  62. snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
  63. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
  64. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
  65. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
  66. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
  67. snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
  68. snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
  69. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
  70. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
  71. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
  72. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
  73. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
  75. snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
  76. snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
  77. snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
  78. snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
  79. snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
  80. snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
  81. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
  82. snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
  83. snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
  84. snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
  85. snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
  86. snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
  87. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
  88. snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
  89. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
  91. snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
  92. snowflake/ml/modeling/impute/knn_imputer.py +19 -3
  93. snowflake/ml/modeling/impute/missing_indicator.py +19 -3
  94. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
  95. snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
  96. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
  97. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
  98. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
  99. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
  100. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
  101. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
  102. snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
  103. snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
  104. snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
  105. snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
  106. snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
  107. snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
  108. snowflake/ml/modeling/linear_model/lars.py +19 -3
  109. snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
  110. snowflake/ml/modeling/linear_model/lasso.py +19 -3
  111. snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
  112. snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
  113. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
  114. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
  115. snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
  116. snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
  117. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
  118. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
  120. snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
  121. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
  122. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
  123. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
  124. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
  125. snowflake/ml/modeling/linear_model/perceptron.py +19 -3
  126. snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
  127. snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
  128. snowflake/ml/modeling/linear_model/ridge.py +19 -3
  129. snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
  130. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
  131. snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
  132. snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
  133. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
  134. snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
  135. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
  136. snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
  137. snowflake/ml/modeling/manifold/isomap.py +19 -3
  138. snowflake/ml/modeling/manifold/mds.py +19 -3
  139. snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
  140. snowflake/ml/modeling/manifold/tsne.py +19 -3
  141. snowflake/ml/modeling/metrics/classification.py +5 -6
  142. snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
  143. snowflake/ml/modeling/metrics/ranking.py +7 -3
  144. snowflake/ml/modeling/metrics/regression.py +6 -3
  145. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
  146. snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
  147. snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
  148. snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
  149. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
  150. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
  151. snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
  152. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
  153. snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
  154. snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
  155. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
  156. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
  157. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
  158. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
  159. snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
  160. snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
  161. snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
  162. snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
  163. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
  164. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
  165. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
  166. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
  167. snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
  168. snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
  169. snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
  170. snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
  171. snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
  172. snowflake/ml/modeling/svm/linear_svc.py +19 -3
  173. snowflake/ml/modeling/svm/linear_svr.py +19 -3
  174. snowflake/ml/modeling/svm/nu_svc.py +19 -3
  175. snowflake/ml/modeling/svm/nu_svr.py +19 -3
  176. snowflake/ml/modeling/svm/svc.py +19 -3
  177. snowflake/ml/modeling/svm/svr.py +19 -3
  178. snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
  179. snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
  180. snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
  181. snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
  182. snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
  183. snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
  184. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
  185. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
  186. snowflake/ml/registry/registry.py +2 -0
  187. snowflake/ml/version.py +1 -1
  188. snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
  189. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
  190. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
  191. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
  192. snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
  193. /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
  194. /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -194,7 +194,7 @@ class HuberRegressor(BaseTransformer):
194
194
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
195
195
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
196
196
  self._snowpark_cols: Optional[List[str]] = self.input_cols
197
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=HuberRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
197
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=HuberRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
198
198
  self._autogenerated = True
199
199
 
200
200
  def _get_rand_id(self) -> str:
@@ -554,6 +554,22 @@ class HuberRegressor(BaseTransformer):
554
554
  # each row containing a list of values.
555
555
  expected_dtype = "ARRAY"
556
556
 
557
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
558
+ if expected_dtype == "":
559
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
560
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
561
+ expected_dtype = "ARRAY"
562
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
563
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
564
+ expected_dtype = "ARRAY"
565
+ else:
566
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
567
+ # We can only infer the output types from the input types if the following two statemetns are true:
568
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
569
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
570
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
571
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
572
+
557
573
  output_df = self._batch_inference(
558
574
  dataset=dataset,
559
575
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -223,7 +223,7 @@ class Lars(BaseTransformer):
223
223
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
224
224
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
225
225
  self._snowpark_cols: Optional[List[str]] = self.input_cols
226
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=Lars.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
226
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=Lars.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
227
227
  self._autogenerated = True
228
228
 
229
229
  def _get_rand_id(self) -> str:
@@ -583,6 +583,22 @@ class Lars(BaseTransformer):
583
583
  # each row containing a list of values.
584
584
  expected_dtype = "ARRAY"
585
585
 
586
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
587
+ if expected_dtype == "":
588
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
589
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
590
+ expected_dtype = "ARRAY"
591
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
592
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
593
+ expected_dtype = "ARRAY"
594
+ else:
595
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
596
+ # We can only infer the output types from the input types if the following two statemetns are true:
597
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
598
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
599
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
600
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
601
+
586
602
  output_df = self._batch_inference(
587
603
  dataset=dataset,
588
604
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -231,7 +231,7 @@ class LarsCV(BaseTransformer):
231
231
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
232
232
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
233
233
  self._snowpark_cols: Optional[List[str]] = self.input_cols
234
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LarsCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
234
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LarsCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
235
235
  self._autogenerated = True
236
236
 
237
237
  def _get_rand_id(self) -> str:
@@ -591,6 +591,22 @@ class LarsCV(BaseTransformer):
591
591
  # each row containing a list of values.
592
592
  expected_dtype = "ARRAY"
593
593
 
594
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
595
+ if expected_dtype == "":
596
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
597
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
598
+ expected_dtype = "ARRAY"
599
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
600
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
601
+ expected_dtype = "ARRAY"
602
+ else:
603
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
604
+ # We can only infer the output types from the input types if the following two statemetns are true:
605
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
606
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
607
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
608
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
609
+
594
610
  output_df = self._batch_inference(
595
611
  dataset=dataset,
596
612
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -224,7 +224,7 @@ class Lasso(BaseTransformer):
224
224
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
225
225
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
226
226
  self._snowpark_cols: Optional[List[str]] = self.input_cols
227
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=Lasso.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
227
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=Lasso.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
228
228
  self._autogenerated = True
229
229
 
230
230
  def _get_rand_id(self) -> str:
@@ -584,6 +584,22 @@ class Lasso(BaseTransformer):
584
584
  # each row containing a list of values.
585
585
  expected_dtype = "ARRAY"
586
586
 
587
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
588
+ if expected_dtype == "":
589
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
590
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
591
+ expected_dtype = "ARRAY"
592
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
593
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
594
+ expected_dtype = "ARRAY"
595
+ else:
596
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
597
+ # We can only infer the output types from the input types if the following two statemetns are true:
598
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
599
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
600
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
601
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
602
+
587
603
  output_df = self._batch_inference(
588
604
  dataset=dataset,
589
605
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -252,7 +252,7 @@ class LassoCV(BaseTransformer):
252
252
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
253
253
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
254
254
  self._snowpark_cols: Optional[List[str]] = self.input_cols
255
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LassoCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
255
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
256
256
  self._autogenerated = True
257
257
 
258
258
  def _get_rand_id(self) -> str:
@@ -612,6 +612,22 @@ class LassoCV(BaseTransformer):
612
612
  # each row containing a list of values.
613
613
  expected_dtype = "ARRAY"
614
614
 
615
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
616
+ if expected_dtype == "":
617
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
618
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
619
+ expected_dtype = "ARRAY"
620
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
621
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
622
+ expected_dtype = "ARRAY"
623
+ else:
624
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
625
+ # We can only infer the output types from the input types if the following two statemetns are true:
626
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
627
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
628
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
629
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
630
+
615
631
  output_df = self._batch_inference(
616
632
  dataset=dataset,
617
633
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -244,7 +244,7 @@ class LassoLars(BaseTransformer):
244
244
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
245
245
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
246
246
  self._snowpark_cols: Optional[List[str]] = self.input_cols
247
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LassoLars.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
247
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoLars.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
248
248
  self._autogenerated = True
249
249
 
250
250
  def _get_rand_id(self) -> str:
@@ -604,6 +604,22 @@ class LassoLars(BaseTransformer):
604
604
  # each row containing a list of values.
605
605
  expected_dtype = "ARRAY"
606
606
 
607
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
608
+ if expected_dtype == "":
609
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
610
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
611
+ expected_dtype = "ARRAY"
612
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
613
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
614
+ expected_dtype = "ARRAY"
615
+ else:
616
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
617
+ # We can only infer the output types from the input types if the following two statemetns are true:
618
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
619
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
620
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
621
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
622
+
607
623
  output_df = self._batch_inference(
608
624
  dataset=dataset,
609
625
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -245,7 +245,7 @@ class LassoLarsCV(BaseTransformer):
245
245
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
246
246
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
247
247
  self._snowpark_cols: Optional[List[str]] = self.input_cols
248
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LassoLarsCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
248
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoLarsCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
249
249
  self._autogenerated = True
250
250
 
251
251
  def _get_rand_id(self) -> str:
@@ -605,6 +605,22 @@ class LassoLarsCV(BaseTransformer):
605
605
  # each row containing a list of values.
606
606
  expected_dtype = "ARRAY"
607
607
 
608
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
609
+ if expected_dtype == "":
610
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
611
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
612
+ expected_dtype = "ARRAY"
613
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
614
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
615
+ expected_dtype = "ARRAY"
616
+ else:
617
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
618
+ # We can only infer the output types from the input types if the following two statemetns are true:
619
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
620
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
621
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
622
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
623
+
608
624
  output_df = self._batch_inference(
609
625
  dataset=dataset,
610
626
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -228,7 +228,7 @@ class LassoLarsIC(BaseTransformer):
228
228
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
229
229
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
230
230
  self._snowpark_cols: Optional[List[str]] = self.input_cols
231
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LassoLarsIC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
231
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoLarsIC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
232
232
  self._autogenerated = True
233
233
 
234
234
  def _get_rand_id(self) -> str:
@@ -588,6 +588,22 @@ class LassoLarsIC(BaseTransformer):
588
588
  # each row containing a list of values.
589
589
  expected_dtype = "ARRAY"
590
590
 
591
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
592
+ if expected_dtype == "":
593
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
594
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
595
+ expected_dtype = "ARRAY"
596
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
597
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
598
+ expected_dtype = "ARRAY"
599
+ else:
600
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
601
+ # We can only infer the output types from the input types if the following two statemetns are true:
602
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
603
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
604
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
605
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
606
+
591
607
  output_df = self._batch_inference(
592
608
  dataset=dataset,
593
609
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -181,7 +181,7 @@ class LinearRegression(BaseTransformer):
181
181
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
182
182
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
183
183
  self._snowpark_cols: Optional[List[str]] = self.input_cols
184
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LinearRegression.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
184
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LinearRegression.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
185
185
  self._autogenerated = True
186
186
 
187
187
  def _get_rand_id(self) -> str:
@@ -541,6 +541,22 @@ class LinearRegression(BaseTransformer):
541
541
  # each row containing a list of values.
542
542
  expected_dtype = "ARRAY"
543
543
 
544
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
545
+ if expected_dtype == "":
546
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
547
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
548
+ expected_dtype = "ARRAY"
549
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
550
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
551
+ expected_dtype = "ARRAY"
552
+ else:
553
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
554
+ # We can only infer the output types from the input types if the following two statemetns are true:
555
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
556
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
557
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
558
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
559
+
544
560
  output_df = self._batch_inference(
545
561
  dataset=dataset,
546
562
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -295,7 +295,7 @@ class LogisticRegression(BaseTransformer):
295
295
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
296
296
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
297
297
  self._snowpark_cols: Optional[List[str]] = self.input_cols
298
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LogisticRegression.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
298
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LogisticRegression.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
299
299
  self._autogenerated = True
300
300
 
301
301
  def _get_rand_id(self) -> str:
@@ -655,6 +655,22 @@ class LogisticRegression(BaseTransformer):
655
655
  # each row containing a list of values.
656
656
  expected_dtype = "ARRAY"
657
657
 
658
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
659
+ if expected_dtype == "":
660
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
661
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
662
+ expected_dtype = "ARRAY"
663
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
664
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
665
+ expected_dtype = "ARRAY"
666
+ else:
667
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
668
+ # We can only infer the output types from the input types if the following two statemetns are true:
669
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
670
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
671
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
672
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
673
+
658
674
  output_df = self._batch_inference(
659
675
  dataset=dataset,
660
676
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -316,7 +316,7 @@ class LogisticRegressionCV(BaseTransformer):
316
316
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
317
317
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
318
318
  self._snowpark_cols: Optional[List[str]] = self.input_cols
319
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=LogisticRegressionCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
319
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=LogisticRegressionCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
320
320
  self._autogenerated = True
321
321
 
322
322
  def _get_rand_id(self) -> str:
@@ -676,6 +676,22 @@ class LogisticRegressionCV(BaseTransformer):
676
676
  # each row containing a list of values.
677
677
  expected_dtype = "ARRAY"
678
678
 
679
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
680
+ if expected_dtype == "":
681
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
682
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
683
+ expected_dtype = "ARRAY"
684
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
685
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
686
+ expected_dtype = "ARRAY"
687
+ else:
688
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
689
+ # We can only infer the output types from the input types if the following two statemetns are true:
690
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
691
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
692
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
693
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
694
+
679
695
  output_df = self._batch_inference(
680
696
  dataset=dataset,
681
697
  inference_method="transform",
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
26
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
27
27
  from snowflake.snowpark import DataFrame, Session
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
30
30
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
31
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
32
32
  from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
35
35
  transform_snowml_obj_to_sklearn_obj,
36
36
  validate_sklearn_args,
37
37
  )
38
- from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
38
+ from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
39
39
 
40
40
  from snowflake.ml.model.model_signature import (
41
41
  DataType,
@@ -214,7 +214,7 @@ class MultiTaskElasticNet(BaseTransformer):
214
214
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
215
215
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
216
216
  self._snowpark_cols: Optional[List[str]] = self.input_cols
217
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=MultiTaskElasticNet.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
217
+ self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskElasticNet.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
218
218
  self._autogenerated = True
219
219
 
220
220
  def _get_rand_id(self) -> str:
@@ -574,6 +574,22 @@ class MultiTaskElasticNet(BaseTransformer):
574
574
  # each row containing a list of values.
575
575
  expected_dtype = "ARRAY"
576
576
 
577
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
578
+ if expected_dtype == "":
579
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
580
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
581
+ expected_dtype = "ARRAY"
582
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
583
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
584
+ expected_dtype = "ARRAY"
585
+ else:
586
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
587
+ # We can only infer the output types from the input types if the following two statemetns are true:
588
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
589
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
590
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
591
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
592
+
577
593
  output_df = self._batch_inference(
578
594
  dataset=dataset,
579
595
  inference_method="transform",