snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. snowflake/ml/_internal/telemetry.py +19 -0
  2. snowflake/ml/model/_client/ops/model_ops.py +16 -38
  3. snowflake/ml/model/_client/sql/model.py +1 -7
  4. snowflake/ml/model/_client/sql/model_version.py +20 -15
  5. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
  6. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
  7. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
  8. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
  9. snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
  10. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
  11. snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
  12. snowflake/ml/model/type_hints.py +3 -0
  13. snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +63 -95
  14. snowflake/ml/modeling/_internal/snowpark_handlers.py +9 -6
  15. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +16 -0
  16. snowflake/ml/modeling/cluster/affinity_propagation.py +16 -0
  17. snowflake/ml/modeling/cluster/agglomerative_clustering.py +16 -0
  18. snowflake/ml/modeling/cluster/birch.py +16 -0
  19. snowflake/ml/modeling/cluster/bisecting_k_means.py +16 -0
  20. snowflake/ml/modeling/cluster/dbscan.py +16 -0
  21. snowflake/ml/modeling/cluster/feature_agglomeration.py +16 -0
  22. snowflake/ml/modeling/cluster/k_means.py +16 -0
  23. snowflake/ml/modeling/cluster/mean_shift.py +16 -0
  24. snowflake/ml/modeling/cluster/mini_batch_k_means.py +16 -0
  25. snowflake/ml/modeling/cluster/optics.py +16 -0
  26. snowflake/ml/modeling/cluster/spectral_biclustering.py +16 -0
  27. snowflake/ml/modeling/cluster/spectral_clustering.py +16 -0
  28. snowflake/ml/modeling/cluster/spectral_coclustering.py +16 -0
  29. snowflake/ml/modeling/compose/column_transformer.py +16 -0
  30. snowflake/ml/modeling/compose/transformed_target_regressor.py +16 -0
  31. snowflake/ml/modeling/covariance/elliptic_envelope.py +16 -0
  32. snowflake/ml/modeling/covariance/empirical_covariance.py +16 -0
  33. snowflake/ml/modeling/covariance/graphical_lasso.py +16 -0
  34. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +16 -0
  35. snowflake/ml/modeling/covariance/ledoit_wolf.py +16 -0
  36. snowflake/ml/modeling/covariance/min_cov_det.py +16 -0
  37. snowflake/ml/modeling/covariance/oas.py +16 -0
  38. snowflake/ml/modeling/covariance/shrunk_covariance.py +16 -0
  39. snowflake/ml/modeling/decomposition/dictionary_learning.py +16 -0
  40. snowflake/ml/modeling/decomposition/factor_analysis.py +16 -0
  41. snowflake/ml/modeling/decomposition/fast_ica.py +16 -0
  42. snowflake/ml/modeling/decomposition/incremental_pca.py +16 -0
  43. snowflake/ml/modeling/decomposition/kernel_pca.py +16 -0
  44. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +16 -0
  45. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +16 -0
  46. snowflake/ml/modeling/decomposition/pca.py +16 -0
  47. snowflake/ml/modeling/decomposition/sparse_pca.py +16 -0
  48. snowflake/ml/modeling/decomposition/truncated_svd.py +16 -0
  49. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +16 -0
  50. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +16 -0
  51. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +16 -0
  52. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +16 -0
  53. snowflake/ml/modeling/ensemble/bagging_classifier.py +16 -0
  54. snowflake/ml/modeling/ensemble/bagging_regressor.py +16 -0
  55. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +16 -0
  56. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +16 -0
  57. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +16 -0
  58. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +16 -0
  59. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +16 -0
  60. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +16 -0
  61. snowflake/ml/modeling/ensemble/isolation_forest.py +16 -0
  62. snowflake/ml/modeling/ensemble/random_forest_classifier.py +16 -0
  63. snowflake/ml/modeling/ensemble/random_forest_regressor.py +16 -0
  64. snowflake/ml/modeling/ensemble/stacking_regressor.py +16 -0
  65. snowflake/ml/modeling/ensemble/voting_classifier.py +16 -0
  66. snowflake/ml/modeling/ensemble/voting_regressor.py +16 -0
  67. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +16 -0
  68. snowflake/ml/modeling/feature_selection/select_fdr.py +16 -0
  69. snowflake/ml/modeling/feature_selection/select_fpr.py +16 -0
  70. snowflake/ml/modeling/feature_selection/select_fwe.py +16 -0
  71. snowflake/ml/modeling/feature_selection/select_k_best.py +16 -0
  72. snowflake/ml/modeling/feature_selection/select_percentile.py +16 -0
  73. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +16 -0
  74. snowflake/ml/modeling/feature_selection/variance_threshold.py +16 -0
  75. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +16 -0
  76. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +16 -0
  77. snowflake/ml/modeling/impute/iterative_imputer.py +16 -0
  78. snowflake/ml/modeling/impute/knn_imputer.py +16 -0
  79. snowflake/ml/modeling/impute/missing_indicator.py +16 -0
  80. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +16 -0
  81. snowflake/ml/modeling/kernel_approximation/nystroem.py +16 -0
  82. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +16 -0
  83. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +16 -0
  84. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +16 -0
  85. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +16 -0
  86. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +16 -0
  87. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +16 -0
  88. snowflake/ml/modeling/linear_model/ard_regression.py +16 -0
  89. snowflake/ml/modeling/linear_model/bayesian_ridge.py +16 -0
  90. snowflake/ml/modeling/linear_model/elastic_net.py +16 -0
  91. snowflake/ml/modeling/linear_model/elastic_net_cv.py +16 -0
  92. snowflake/ml/modeling/linear_model/gamma_regressor.py +16 -0
  93. snowflake/ml/modeling/linear_model/huber_regressor.py +16 -0
  94. snowflake/ml/modeling/linear_model/lars.py +16 -0
  95. snowflake/ml/modeling/linear_model/lars_cv.py +16 -0
  96. snowflake/ml/modeling/linear_model/lasso.py +16 -0
  97. snowflake/ml/modeling/linear_model/lasso_cv.py +16 -0
  98. snowflake/ml/modeling/linear_model/lasso_lars.py +16 -0
  99. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +16 -0
  100. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +16 -0
  101. snowflake/ml/modeling/linear_model/linear_regression.py +16 -0
  102. snowflake/ml/modeling/linear_model/logistic_regression.py +16 -0
  103. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +16 -0
  104. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +16 -0
  105. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +16 -0
  106. snowflake/ml/modeling/linear_model/multi_task_lasso.py +16 -0
  107. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +16 -0
  108. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +16 -0
  109. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +16 -0
  110. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +16 -0
  111. snowflake/ml/modeling/linear_model/perceptron.py +16 -0
  112. snowflake/ml/modeling/linear_model/poisson_regressor.py +16 -0
  113. snowflake/ml/modeling/linear_model/ransac_regressor.py +16 -0
  114. snowflake/ml/modeling/linear_model/ridge.py +16 -0
  115. snowflake/ml/modeling/linear_model/ridge_classifier.py +16 -0
  116. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +16 -0
  117. snowflake/ml/modeling/linear_model/ridge_cv.py +16 -0
  118. snowflake/ml/modeling/linear_model/sgd_classifier.py +16 -0
  119. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +16 -0
  120. snowflake/ml/modeling/linear_model/sgd_regressor.py +16 -0
  121. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +16 -0
  122. snowflake/ml/modeling/linear_model/tweedie_regressor.py +16 -0
  123. snowflake/ml/modeling/manifold/isomap.py +16 -0
  124. snowflake/ml/modeling/manifold/mds.py +16 -0
  125. snowflake/ml/modeling/manifold/spectral_embedding.py +16 -0
  126. snowflake/ml/modeling/manifold/tsne.py +16 -0
  127. snowflake/ml/modeling/metrics/classification.py +5 -6
  128. snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
  129. snowflake/ml/modeling/metrics/ranking.py +7 -3
  130. snowflake/ml/modeling/metrics/regression.py +6 -3
  131. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +16 -0
  132. snowflake/ml/modeling/mixture/gaussian_mixture.py +16 -0
  133. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +16 -0
  134. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +16 -0
  135. snowflake/ml/modeling/multiclass/output_code_classifier.py +16 -0
  136. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +16 -0
  137. snowflake/ml/modeling/naive_bayes/categorical_nb.py +16 -0
  138. snowflake/ml/modeling/naive_bayes/complement_nb.py +16 -0
  139. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +16 -0
  140. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +16 -0
  141. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +16 -0
  142. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +16 -0
  143. snowflake/ml/modeling/neighbors/kernel_density.py +16 -0
  144. snowflake/ml/modeling/neighbors/local_outlier_factor.py +16 -0
  145. snowflake/ml/modeling/neighbors/nearest_centroid.py +16 -0
  146. snowflake/ml/modeling/neighbors/nearest_neighbors.py +16 -0
  147. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +16 -0
  148. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +16 -0
  149. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +16 -0
  150. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +16 -0
  151. snowflake/ml/modeling/neural_network/mlp_classifier.py +16 -0
  152. snowflake/ml/modeling/neural_network/mlp_regressor.py +16 -0
  153. snowflake/ml/modeling/preprocessing/polynomial_features.py +16 -0
  154. snowflake/ml/modeling/semi_supervised/label_propagation.py +16 -0
  155. snowflake/ml/modeling/semi_supervised/label_spreading.py +16 -0
  156. snowflake/ml/modeling/svm/linear_svc.py +16 -0
  157. snowflake/ml/modeling/svm/linear_svr.py +16 -0
  158. snowflake/ml/modeling/svm/nu_svc.py +16 -0
  159. snowflake/ml/modeling/svm/nu_svr.py +16 -0
  160. snowflake/ml/modeling/svm/svc.py +16 -0
  161. snowflake/ml/modeling/svm/svr.py +16 -0
  162. snowflake/ml/modeling/tree/decision_tree_classifier.py +16 -0
  163. snowflake/ml/modeling/tree/decision_tree_regressor.py +16 -0
  164. snowflake/ml/modeling/tree/extra_tree_classifier.py +16 -0
  165. snowflake/ml/modeling/tree/extra_tree_regressor.py +16 -0
  166. snowflake/ml/modeling/xgboost/xgb_classifier.py +16 -0
  167. snowflake/ml/modeling/xgboost/xgb_regressor.py +16 -0
  168. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +16 -0
  169. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +16 -0
  170. snowflake/ml/registry/registry.py +2 -0
  171. snowflake/ml/version.py +1 -1
  172. snowflake_ml_python-1.2.1.dist-info/LICENSE.txt +202 -0
  173. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/METADATA +261 -50
  174. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/RECORD +189 -186
  175. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/WHEEL +2 -1
  176. snowflake_ml_python-1.2.1.dist-info/top_level.txt +1 -0
@@ -585,6 +585,22 @@ class IsolationForest(BaseTransformer):
585
585
  # each row containing a list of values.
586
586
  expected_dtype = "ARRAY"
587
587
 
588
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
589
+ if expected_dtype == "":
590
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
591
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
592
+ expected_dtype = "ARRAY"
593
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
594
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
595
+ expected_dtype = "ARRAY"
596
+ else:
597
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
598
+ # We can only infer the output types from the input types if the following two statemetns are true:
599
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
600
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
601
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
602
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
603
+
588
604
  output_df = self._batch_inference(
589
605
  dataset=dataset,
590
606
  inference_method="transform",
@@ -697,6 +697,22 @@ class RandomForestClassifier(BaseTransformer):
697
697
  # each row containing a list of values.
698
698
  expected_dtype = "ARRAY"
699
699
 
700
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
701
+ if expected_dtype == "":
702
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
703
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
704
+ expected_dtype = "ARRAY"
705
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
706
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
707
+ expected_dtype = "ARRAY"
708
+ else:
709
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
710
+ # We can only infer the output types from the input types if the following two statemetns are true:
711
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
712
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
713
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
714
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
715
+
700
716
  output_df = self._batch_inference(
701
717
  dataset=dataset,
702
718
  inference_method="transform",
@@ -676,6 +676,22 @@ class RandomForestRegressor(BaseTransformer):
676
676
  # each row containing a list of values.
677
677
  expected_dtype = "ARRAY"
678
678
 
679
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
680
+ if expected_dtype == "":
681
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
682
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
683
+ expected_dtype = "ARRAY"
684
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
685
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
686
+ expected_dtype = "ARRAY"
687
+ else:
688
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
689
+ # We can only infer the output types from the input types if the following two statemetns are true:
690
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
691
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
692
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
693
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
694
+
679
695
  output_df = self._batch_inference(
680
696
  dataset=dataset,
681
697
  inference_method="transform",
@@ -579,6 +579,22 @@ class StackingRegressor(BaseTransformer):
579
579
  # each row containing a list of values.
580
580
  expected_dtype = "ARRAY"
581
581
 
582
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
583
+ if expected_dtype == "":
584
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
585
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
586
+ expected_dtype = "ARRAY"
587
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
588
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
589
+ expected_dtype = "ARRAY"
590
+ else:
591
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
592
+ # We can only infer the output types from the input types if the following two statemetns are true:
593
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
594
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
595
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
596
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
597
+
582
598
  output_df = self._batch_inference(
583
599
  dataset=dataset,
584
600
  inference_method="transform",
@@ -561,6 +561,22 @@ class VotingClassifier(BaseTransformer):
561
561
  # each row containing a list of values.
562
562
  expected_dtype = "ARRAY"
563
563
 
564
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
565
+ if expected_dtype == "":
566
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
567
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
568
+ expected_dtype = "ARRAY"
569
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
570
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
571
+ expected_dtype = "ARRAY"
572
+ else:
573
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
574
+ # We can only infer the output types from the input types if the following two statemetns are true:
575
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
576
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
577
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
578
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
579
+
564
580
  output_df = self._batch_inference(
565
581
  dataset=dataset,
566
582
  inference_method="transform",
@@ -543,6 +543,22 @@ class VotingRegressor(BaseTransformer):
543
543
  # each row containing a list of values.
544
544
  expected_dtype = "ARRAY"
545
545
 
546
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
547
+ if expected_dtype == "":
548
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
549
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
550
+ expected_dtype = "ARRAY"
551
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
552
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
553
+ expected_dtype = "ARRAY"
554
+ else:
555
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
556
+ # We can only infer the output types from the input types if the following two statemetns are true:
557
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
558
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
559
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
560
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
561
+
546
562
  output_df = self._batch_inference(
547
563
  dataset=dataset,
548
564
  inference_method="transform",
@@ -531,6 +531,22 @@ class GenericUnivariateSelect(BaseTransformer):
531
531
  # each row containing a list of values.
532
532
  expected_dtype = "ARRAY"
533
533
 
534
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
535
+ if expected_dtype == "":
536
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
537
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
538
+ expected_dtype = "ARRAY"
539
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
540
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
541
+ expected_dtype = "ARRAY"
542
+ else:
543
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
544
+ # We can only infer the output types from the input types if the following two statemetns are true:
545
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
546
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
547
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
548
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
549
+
534
550
  output_df = self._batch_inference(
535
551
  dataset=dataset,
536
552
  inference_method="transform",
@@ -527,6 +527,22 @@ class SelectFdr(BaseTransformer):
527
527
  # each row containing a list of values.
528
528
  expected_dtype = "ARRAY"
529
529
 
530
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
531
+ if expected_dtype == "":
532
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
533
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
534
+ expected_dtype = "ARRAY"
535
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
536
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
537
+ expected_dtype = "ARRAY"
538
+ else:
539
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
540
+ # We can only infer the output types from the input types if the following two statemetns are true:
541
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
542
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
543
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
544
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
545
+
530
546
  output_df = self._batch_inference(
531
547
  dataset=dataset,
532
548
  inference_method="transform",
@@ -527,6 +527,22 @@ class SelectFpr(BaseTransformer):
527
527
  # each row containing a list of values.
528
528
  expected_dtype = "ARRAY"
529
529
 
530
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
531
+ if expected_dtype == "":
532
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
533
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
534
+ expected_dtype = "ARRAY"
535
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
536
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
537
+ expected_dtype = "ARRAY"
538
+ else:
539
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
540
+ # We can only infer the output types from the input types if the following two statemetns are true:
541
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
542
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
543
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
544
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
545
+
530
546
  output_df = self._batch_inference(
531
547
  dataset=dataset,
532
548
  inference_method="transform",
@@ -527,6 +527,22 @@ class SelectFwe(BaseTransformer):
527
527
  # each row containing a list of values.
528
528
  expected_dtype = "ARRAY"
529
529
 
530
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
531
+ if expected_dtype == "":
532
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
533
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
534
+ expected_dtype = "ARRAY"
535
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
536
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
537
+ expected_dtype = "ARRAY"
538
+ else:
539
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
540
+ # We can only infer the output types from the input types if the following two statemetns are true:
541
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
542
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
543
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
544
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
545
+
530
546
  output_df = self._batch_inference(
531
547
  dataset=dataset,
532
548
  inference_method="transform",
@@ -528,6 +528,22 @@ class SelectKBest(BaseTransformer):
528
528
  # each row containing a list of values.
529
529
  expected_dtype = "ARRAY"
530
530
 
531
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
532
+ if expected_dtype == "":
533
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
534
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
535
+ expected_dtype = "ARRAY"
536
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
537
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
538
+ expected_dtype = "ARRAY"
539
+ else:
540
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
541
+ # We can only infer the output types from the input types if the following two statemetns are true:
542
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
543
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
544
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
545
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
546
+
531
547
  output_df = self._batch_inference(
532
548
  dataset=dataset,
533
549
  inference_method="transform",
@@ -527,6 +527,22 @@ class SelectPercentile(BaseTransformer):
527
527
  # each row containing a list of values.
528
528
  expected_dtype = "ARRAY"
529
529
 
530
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
531
+ if expected_dtype == "":
532
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
533
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
534
+ expected_dtype = "ARRAY"
535
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
536
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
537
+ expected_dtype = "ARRAY"
538
+ else:
539
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
540
+ # We can only infer the output types from the input types if the following two statemetns are true:
541
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
542
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
543
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
544
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
545
+
530
546
  output_df = self._batch_inference(
531
547
  dataset=dataset,
532
548
  inference_method="transform",
@@ -585,6 +585,22 @@ class SequentialFeatureSelector(BaseTransformer):
585
585
  # each row containing a list of values.
586
586
  expected_dtype = "ARRAY"
587
587
 
588
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
589
+ if expected_dtype == "":
590
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
591
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
592
+ expected_dtype = "ARRAY"
593
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
594
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
595
+ expected_dtype = "ARRAY"
596
+ else:
597
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
598
+ # We can only infer the output types from the input types if the following two statemetns are true:
599
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
600
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
601
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
602
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
603
+
588
604
  output_df = self._batch_inference(
589
605
  dataset=dataset,
590
606
  inference_method="transform",
@@ -518,6 +518,22 @@ class VarianceThreshold(BaseTransformer):
518
518
  # each row containing a list of values.
519
519
  expected_dtype = "ARRAY"
520
520
 
521
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
522
+ if expected_dtype == "":
523
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
524
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
525
+ expected_dtype = "ARRAY"
526
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
527
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
528
+ expected_dtype = "ARRAY"
529
+ else:
530
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
531
+ # We can only infer the output types from the input types if the following two statemetns are true:
532
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
533
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
534
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
535
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
536
+
521
537
  output_df = self._batch_inference(
522
538
  dataset=dataset,
523
539
  inference_method="transform",
@@ -613,6 +613,22 @@ class GaussianProcessClassifier(BaseTransformer):
613
613
  # each row containing a list of values.
614
614
  expected_dtype = "ARRAY"
615
615
 
616
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
617
+ if expected_dtype == "":
618
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
619
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
620
+ expected_dtype = "ARRAY"
621
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
622
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
623
+ expected_dtype = "ARRAY"
624
+ else:
625
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
626
+ # We can only infer the output types from the input types if the following two statemetns are true:
627
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
628
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
629
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
630
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
631
+
616
632
  output_df = self._batch_inference(
617
633
  dataset=dataset,
618
634
  inference_method="transform",
@@ -604,6 +604,22 @@ class GaussianProcessRegressor(BaseTransformer):
604
604
  # each row containing a list of values.
605
605
  expected_dtype = "ARRAY"
606
606
 
607
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
608
+ if expected_dtype == "":
609
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
610
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
611
+ expected_dtype = "ARRAY"
612
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
613
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
614
+ expected_dtype = "ARRAY"
615
+ else:
616
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
617
+ # We can only infer the output types from the input types if the following two statemetns are true:
618
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
619
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
620
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
621
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
622
+
607
623
  output_df = self._batch_inference(
608
624
  dataset=dataset,
609
625
  inference_method="transform",
@@ -646,6 +646,22 @@ class IterativeImputer(BaseTransformer):
646
646
  # each row containing a list of values.
647
647
  expected_dtype = "ARRAY"
648
648
 
649
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
650
+ if expected_dtype == "":
651
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
652
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
653
+ expected_dtype = "ARRAY"
654
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
655
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
656
+ expected_dtype = "ARRAY"
657
+ else:
658
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
659
+ # We can only infer the output types from the input types if the following two statemetns are true:
660
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
661
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
662
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
663
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
664
+
649
665
  output_df = self._batch_inference(
650
666
  dataset=dataset,
651
667
  inference_method="transform",
@@ -572,6 +572,22 @@ class KNNImputer(BaseTransformer):
572
572
  # each row containing a list of values.
573
573
  expected_dtype = "ARRAY"
574
574
 
575
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
576
+ if expected_dtype == "":
577
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
578
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
579
+ expected_dtype = "ARRAY"
580
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
581
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
582
+ expected_dtype = "ARRAY"
583
+ else:
584
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
585
+ # We can only infer the output types from the input types if the following two statemetns are true:
586
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
587
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
588
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
589
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
590
+
575
591
  output_df = self._batch_inference(
576
592
  dataset=dataset,
577
593
  inference_method="transform",
@@ -546,6 +546,22 @@ class MissingIndicator(BaseTransformer):
546
546
  # each row containing a list of values.
547
547
  expected_dtype = "ARRAY"
548
548
 
549
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
550
+ if expected_dtype == "":
551
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
552
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
553
+ expected_dtype = "ARRAY"
554
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
555
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
556
+ expected_dtype = "ARRAY"
557
+ else:
558
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
559
+ # We can only infer the output types from the input types if the following two statemetns are true:
560
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
561
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
562
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
563
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
564
+
549
565
  output_df = self._batch_inference(
550
566
  dataset=dataset,
551
567
  inference_method="transform",
@@ -521,6 +521,22 @@ class AdditiveChi2Sampler(BaseTransformer):
521
521
  # each row containing a list of values.
522
522
  expected_dtype = "ARRAY"
523
523
 
524
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
525
+ if expected_dtype == "":
526
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
527
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
528
+ expected_dtype = "ARRAY"
529
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
530
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
531
+ expected_dtype = "ARRAY"
532
+ else:
533
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
534
+ # We can only infer the output types from the input types if the following two statemetns are true:
535
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
536
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
537
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
538
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
539
+
524
540
  output_df = self._batch_inference(
525
541
  dataset=dataset,
526
542
  inference_method="transform",
@@ -569,6 +569,22 @@ class Nystroem(BaseTransformer):
569
569
  # each row containing a list of values.
570
570
  expected_dtype = "ARRAY"
571
571
 
572
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
573
+ if expected_dtype == "":
574
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
575
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
576
+ expected_dtype = "ARRAY"
577
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
578
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
579
+ expected_dtype = "ARRAY"
580
+ else:
581
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
582
+ # We can only infer the output types from the input types if the following two statemetns are true:
583
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
584
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
585
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
586
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
587
+
572
588
  output_df = self._batch_inference(
573
589
  dataset=dataset,
574
590
  inference_method="transform",
@@ -545,6 +545,22 @@ class PolynomialCountSketch(BaseTransformer):
545
545
  # each row containing a list of values.
546
546
  expected_dtype = "ARRAY"
547
547
 
548
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
549
+ if expected_dtype == "":
550
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
551
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
552
+ expected_dtype = "ARRAY"
553
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
554
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
555
+ expected_dtype = "ARRAY"
556
+ else:
557
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
558
+ # We can only infer the output types from the input types if the following two statemetns are true:
559
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
560
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
561
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
562
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
563
+
548
564
  output_df = self._batch_inference(
549
565
  dataset=dataset,
550
566
  inference_method="transform",
@@ -532,6 +532,22 @@ class RBFSampler(BaseTransformer):
532
532
  # each row containing a list of values.
533
533
  expected_dtype = "ARRAY"
534
534
 
535
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
536
+ if expected_dtype == "":
537
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
538
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
539
+ expected_dtype = "ARRAY"
540
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
541
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
542
+ expected_dtype = "ARRAY"
543
+ else:
544
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
545
+ # We can only infer the output types from the input types if the following two statemetns are true:
546
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
547
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
548
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
549
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
550
+
535
551
  output_df = self._batch_inference(
536
552
  dataset=dataset,
537
553
  inference_method="transform",
@@ -530,6 +530,22 @@ class SkewedChi2Sampler(BaseTransformer):
530
530
  # each row containing a list of values.
531
531
  expected_dtype = "ARRAY"
532
532
 
533
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
534
+ if expected_dtype == "":
535
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
536
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
537
+ expected_dtype = "ARRAY"
538
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
539
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
540
+ expected_dtype = "ARRAY"
541
+ else:
542
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
543
+ # We can only infer the output types from the input types if the following two statemetns are true:
544
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
545
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
546
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
547
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
548
+
533
549
  output_df = self._batch_inference(
534
550
  dataset=dataset,
535
551
  inference_method="transform",