snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. snowflake/ml/_internal/telemetry.py +19 -0
  2. snowflake/ml/model/_client/ops/model_ops.py +16 -38
  3. snowflake/ml/model/_client/sql/model.py +1 -7
  4. snowflake/ml/model/_client/sql/model_version.py +20 -15
  5. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
  6. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
  7. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
  8. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
  9. snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
  10. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
  11. snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
  12. snowflake/ml/model/type_hints.py +3 -0
  13. snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +63 -95
  14. snowflake/ml/modeling/_internal/snowpark_handlers.py +9 -6
  15. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +16 -0
  16. snowflake/ml/modeling/cluster/affinity_propagation.py +16 -0
  17. snowflake/ml/modeling/cluster/agglomerative_clustering.py +16 -0
  18. snowflake/ml/modeling/cluster/birch.py +16 -0
  19. snowflake/ml/modeling/cluster/bisecting_k_means.py +16 -0
  20. snowflake/ml/modeling/cluster/dbscan.py +16 -0
  21. snowflake/ml/modeling/cluster/feature_agglomeration.py +16 -0
  22. snowflake/ml/modeling/cluster/k_means.py +16 -0
  23. snowflake/ml/modeling/cluster/mean_shift.py +16 -0
  24. snowflake/ml/modeling/cluster/mini_batch_k_means.py +16 -0
  25. snowflake/ml/modeling/cluster/optics.py +16 -0
  26. snowflake/ml/modeling/cluster/spectral_biclustering.py +16 -0
  27. snowflake/ml/modeling/cluster/spectral_clustering.py +16 -0
  28. snowflake/ml/modeling/cluster/spectral_coclustering.py +16 -0
  29. snowflake/ml/modeling/compose/column_transformer.py +16 -0
  30. snowflake/ml/modeling/compose/transformed_target_regressor.py +16 -0
  31. snowflake/ml/modeling/covariance/elliptic_envelope.py +16 -0
  32. snowflake/ml/modeling/covariance/empirical_covariance.py +16 -0
  33. snowflake/ml/modeling/covariance/graphical_lasso.py +16 -0
  34. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +16 -0
  35. snowflake/ml/modeling/covariance/ledoit_wolf.py +16 -0
  36. snowflake/ml/modeling/covariance/min_cov_det.py +16 -0
  37. snowflake/ml/modeling/covariance/oas.py +16 -0
  38. snowflake/ml/modeling/covariance/shrunk_covariance.py +16 -0
  39. snowflake/ml/modeling/decomposition/dictionary_learning.py +16 -0
  40. snowflake/ml/modeling/decomposition/factor_analysis.py +16 -0
  41. snowflake/ml/modeling/decomposition/fast_ica.py +16 -0
  42. snowflake/ml/modeling/decomposition/incremental_pca.py +16 -0
  43. snowflake/ml/modeling/decomposition/kernel_pca.py +16 -0
  44. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +16 -0
  45. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +16 -0
  46. snowflake/ml/modeling/decomposition/pca.py +16 -0
  47. snowflake/ml/modeling/decomposition/sparse_pca.py +16 -0
  48. snowflake/ml/modeling/decomposition/truncated_svd.py +16 -0
  49. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +16 -0
  50. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +16 -0
  51. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +16 -0
  52. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +16 -0
  53. snowflake/ml/modeling/ensemble/bagging_classifier.py +16 -0
  54. snowflake/ml/modeling/ensemble/bagging_regressor.py +16 -0
  55. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +16 -0
  56. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +16 -0
  57. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +16 -0
  58. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +16 -0
  59. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +16 -0
  60. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +16 -0
  61. snowflake/ml/modeling/ensemble/isolation_forest.py +16 -0
  62. snowflake/ml/modeling/ensemble/random_forest_classifier.py +16 -0
  63. snowflake/ml/modeling/ensemble/random_forest_regressor.py +16 -0
  64. snowflake/ml/modeling/ensemble/stacking_regressor.py +16 -0
  65. snowflake/ml/modeling/ensemble/voting_classifier.py +16 -0
  66. snowflake/ml/modeling/ensemble/voting_regressor.py +16 -0
  67. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +16 -0
  68. snowflake/ml/modeling/feature_selection/select_fdr.py +16 -0
  69. snowflake/ml/modeling/feature_selection/select_fpr.py +16 -0
  70. snowflake/ml/modeling/feature_selection/select_fwe.py +16 -0
  71. snowflake/ml/modeling/feature_selection/select_k_best.py +16 -0
  72. snowflake/ml/modeling/feature_selection/select_percentile.py +16 -0
  73. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +16 -0
  74. snowflake/ml/modeling/feature_selection/variance_threshold.py +16 -0
  75. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +16 -0
  76. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +16 -0
  77. snowflake/ml/modeling/impute/iterative_imputer.py +16 -0
  78. snowflake/ml/modeling/impute/knn_imputer.py +16 -0
  79. snowflake/ml/modeling/impute/missing_indicator.py +16 -0
  80. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +16 -0
  81. snowflake/ml/modeling/kernel_approximation/nystroem.py +16 -0
  82. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +16 -0
  83. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +16 -0
  84. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +16 -0
  85. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +16 -0
  86. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +16 -0
  87. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +16 -0
  88. snowflake/ml/modeling/linear_model/ard_regression.py +16 -0
  89. snowflake/ml/modeling/linear_model/bayesian_ridge.py +16 -0
  90. snowflake/ml/modeling/linear_model/elastic_net.py +16 -0
  91. snowflake/ml/modeling/linear_model/elastic_net_cv.py +16 -0
  92. snowflake/ml/modeling/linear_model/gamma_regressor.py +16 -0
  93. snowflake/ml/modeling/linear_model/huber_regressor.py +16 -0
  94. snowflake/ml/modeling/linear_model/lars.py +16 -0
  95. snowflake/ml/modeling/linear_model/lars_cv.py +16 -0
  96. snowflake/ml/modeling/linear_model/lasso.py +16 -0
  97. snowflake/ml/modeling/linear_model/lasso_cv.py +16 -0
  98. snowflake/ml/modeling/linear_model/lasso_lars.py +16 -0
  99. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +16 -0
  100. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +16 -0
  101. snowflake/ml/modeling/linear_model/linear_regression.py +16 -0
  102. snowflake/ml/modeling/linear_model/logistic_regression.py +16 -0
  103. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +16 -0
  104. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +16 -0
  105. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +16 -0
  106. snowflake/ml/modeling/linear_model/multi_task_lasso.py +16 -0
  107. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +16 -0
  108. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +16 -0
  109. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +16 -0
  110. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +16 -0
  111. snowflake/ml/modeling/linear_model/perceptron.py +16 -0
  112. snowflake/ml/modeling/linear_model/poisson_regressor.py +16 -0
  113. snowflake/ml/modeling/linear_model/ransac_regressor.py +16 -0
  114. snowflake/ml/modeling/linear_model/ridge.py +16 -0
  115. snowflake/ml/modeling/linear_model/ridge_classifier.py +16 -0
  116. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +16 -0
  117. snowflake/ml/modeling/linear_model/ridge_cv.py +16 -0
  118. snowflake/ml/modeling/linear_model/sgd_classifier.py +16 -0
  119. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +16 -0
  120. snowflake/ml/modeling/linear_model/sgd_regressor.py +16 -0
  121. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +16 -0
  122. snowflake/ml/modeling/linear_model/tweedie_regressor.py +16 -0
  123. snowflake/ml/modeling/manifold/isomap.py +16 -0
  124. snowflake/ml/modeling/manifold/mds.py +16 -0
  125. snowflake/ml/modeling/manifold/spectral_embedding.py +16 -0
  126. snowflake/ml/modeling/manifold/tsne.py +16 -0
  127. snowflake/ml/modeling/metrics/classification.py +5 -6
  128. snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
  129. snowflake/ml/modeling/metrics/ranking.py +7 -3
  130. snowflake/ml/modeling/metrics/regression.py +6 -3
  131. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +16 -0
  132. snowflake/ml/modeling/mixture/gaussian_mixture.py +16 -0
  133. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +16 -0
  134. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +16 -0
  135. snowflake/ml/modeling/multiclass/output_code_classifier.py +16 -0
  136. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +16 -0
  137. snowflake/ml/modeling/naive_bayes/categorical_nb.py +16 -0
  138. snowflake/ml/modeling/naive_bayes/complement_nb.py +16 -0
  139. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +16 -0
  140. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +16 -0
  141. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +16 -0
  142. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +16 -0
  143. snowflake/ml/modeling/neighbors/kernel_density.py +16 -0
  144. snowflake/ml/modeling/neighbors/local_outlier_factor.py +16 -0
  145. snowflake/ml/modeling/neighbors/nearest_centroid.py +16 -0
  146. snowflake/ml/modeling/neighbors/nearest_neighbors.py +16 -0
  147. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +16 -0
  148. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +16 -0
  149. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +16 -0
  150. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +16 -0
  151. snowflake/ml/modeling/neural_network/mlp_classifier.py +16 -0
  152. snowflake/ml/modeling/neural_network/mlp_regressor.py +16 -0
  153. snowflake/ml/modeling/preprocessing/polynomial_features.py +16 -0
  154. snowflake/ml/modeling/semi_supervised/label_propagation.py +16 -0
  155. snowflake/ml/modeling/semi_supervised/label_spreading.py +16 -0
  156. snowflake/ml/modeling/svm/linear_svc.py +16 -0
  157. snowflake/ml/modeling/svm/linear_svr.py +16 -0
  158. snowflake/ml/modeling/svm/nu_svc.py +16 -0
  159. snowflake/ml/modeling/svm/nu_svr.py +16 -0
  160. snowflake/ml/modeling/svm/svc.py +16 -0
  161. snowflake/ml/modeling/svm/svr.py +16 -0
  162. snowflake/ml/modeling/tree/decision_tree_classifier.py +16 -0
  163. snowflake/ml/modeling/tree/decision_tree_regressor.py +16 -0
  164. snowflake/ml/modeling/tree/extra_tree_classifier.py +16 -0
  165. snowflake/ml/modeling/tree/extra_tree_regressor.py +16 -0
  166. snowflake/ml/modeling/xgboost/xgb_classifier.py +16 -0
  167. snowflake/ml/modeling/xgboost/xgb_regressor.py +16 -0
  168. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +16 -0
  169. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +16 -0
  170. snowflake/ml/registry/registry.py +2 -0
  171. snowflake/ml/version.py +1 -1
  172. snowflake_ml_python-1.2.1.dist-info/LICENSE.txt +202 -0
  173. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/METADATA +261 -50
  174. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/RECORD +189 -186
  175. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/WHEEL +2 -1
  176. snowflake_ml_python-1.2.1.dist-info/top_level.txt +1 -0
@@ -522,6 +522,22 @@ class OAS(BaseTransformer):
522
522
  # each row containing a list of values.
523
523
  expected_dtype = "ARRAY"
524
524
 
525
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
526
+ if expected_dtype == "":
527
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
528
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
529
+ expected_dtype = "ARRAY"
530
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
531
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
532
+ expected_dtype = "ARRAY"
533
+ else:
534
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
535
+ # We can only infer the output types from the input types if the following two statemetns are true:
536
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
537
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
538
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
539
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
540
+
525
541
  output_df = self._batch_inference(
526
542
  dataset=dataset,
527
543
  inference_method="transform",
@@ -528,6 +528,22 @@ class ShrunkCovariance(BaseTransformer):
528
528
  # each row containing a list of values.
529
529
  expected_dtype = "ARRAY"
530
530
 
531
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
532
+ if expected_dtype == "":
533
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
534
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
535
+ expected_dtype = "ARRAY"
536
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
537
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
538
+ expected_dtype = "ARRAY"
539
+ else:
540
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
541
+ # We can only infer the output types from the input types if the following two statemetns are true:
542
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
543
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
544
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
545
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
546
+
531
547
  output_df = self._batch_inference(
532
548
  dataset=dataset,
533
549
  inference_method="transform",
@@ -636,6 +636,22 @@ class DictionaryLearning(BaseTransformer):
636
636
  # each row containing a list of values.
637
637
  expected_dtype = "ARRAY"
638
638
 
639
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
640
+ if expected_dtype == "":
641
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
642
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
643
+ expected_dtype = "ARRAY"
644
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
645
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
646
+ expected_dtype = "ARRAY"
647
+ else:
648
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
649
+ # We can only infer the output types from the input types if the following two statemetns are true:
650
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
651
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
652
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
653
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
654
+
639
655
  output_df = self._batch_inference(
640
656
  dataset=dataset,
641
657
  inference_method="transform",
@@ -573,6 +573,22 @@ class FactorAnalysis(BaseTransformer):
573
573
  # each row containing a list of values.
574
574
  expected_dtype = "ARRAY"
575
575
 
576
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
577
+ if expected_dtype == "":
578
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
579
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
580
+ expected_dtype = "ARRAY"
581
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
582
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
583
+ expected_dtype = "ARRAY"
584
+ else:
585
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
586
+ # We can only infer the output types from the input types if the following two statemetns are true:
587
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
588
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
589
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
590
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
591
+
576
592
  output_df = self._batch_inference(
577
593
  dataset=dataset,
578
594
  inference_method="transform",
@@ -591,6 +591,22 @@ class FastICA(BaseTransformer):
591
591
  # each row containing a list of values.
592
592
  expected_dtype = "ARRAY"
593
593
 
594
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
595
+ if expected_dtype == "":
596
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
597
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
598
+ expected_dtype = "ARRAY"
599
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
600
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
601
+ expected_dtype = "ARRAY"
602
+ else:
603
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
604
+ # We can only infer the output types from the input types if the following two statemetns are true:
605
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
606
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
607
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
608
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
609
+
594
610
  output_df = self._batch_inference(
595
611
  dataset=dataset,
596
612
  inference_method="transform",
@@ -543,6 +543,22 @@ class IncrementalPCA(BaseTransformer):
543
543
  # each row containing a list of values.
544
544
  expected_dtype = "ARRAY"
545
545
 
546
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
547
+ if expected_dtype == "":
548
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
549
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
550
+ expected_dtype = "ARRAY"
551
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
552
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
553
+ expected_dtype = "ARRAY"
554
+ else:
555
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
556
+ # We can only infer the output types from the input types if the following two statemetns are true:
557
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
558
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
559
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
560
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
561
+
546
562
  output_df = self._batch_inference(
547
563
  dataset=dataset,
548
564
  inference_method="transform",
@@ -639,6 +639,22 @@ class KernelPCA(BaseTransformer):
639
639
  # each row containing a list of values.
640
640
  expected_dtype = "ARRAY"
641
641
 
642
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
643
+ if expected_dtype == "":
644
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
645
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
646
+ expected_dtype = "ARRAY"
647
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
648
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
649
+ expected_dtype = "ARRAY"
650
+ else:
651
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
652
+ # We can only infer the output types from the input types if the following two statemetns are true:
653
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
654
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
655
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
656
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
657
+
642
658
  output_df = self._batch_inference(
643
659
  dataset=dataset,
644
660
  inference_method="transform",
@@ -661,6 +661,22 @@ class MiniBatchDictionaryLearning(BaseTransformer):
661
661
  # each row containing a list of values.
662
662
  expected_dtype = "ARRAY"
663
663
 
664
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
665
+ if expected_dtype == "":
666
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
667
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
668
+ expected_dtype = "ARRAY"
669
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
670
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
671
+ expected_dtype = "ARRAY"
672
+ else:
673
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
674
+ # We can only infer the output types from the input types if the following two statemetns are true:
675
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
676
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
677
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
678
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
679
+
664
680
  output_df = self._batch_inference(
665
681
  dataset=dataset,
666
682
  inference_method="transform",
@@ -606,6 +606,22 @@ class MiniBatchSparsePCA(BaseTransformer):
606
606
  # each row containing a list of values.
607
607
  expected_dtype = "ARRAY"
608
608
 
609
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
610
+ if expected_dtype == "":
611
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
612
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
613
+ expected_dtype = "ARRAY"
614
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
615
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
616
+ expected_dtype = "ARRAY"
617
+ else:
618
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
619
+ # We can only infer the output types from the input types if the following two statemetns are true:
620
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
621
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
622
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
623
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
624
+
609
625
  output_df = self._batch_inference(
610
626
  dataset=dataset,
611
627
  inference_method="transform",
@@ -608,6 +608,22 @@ class PCA(BaseTransformer):
608
608
  # each row containing a list of values.
609
609
  expected_dtype = "ARRAY"
610
610
 
611
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
612
+ if expected_dtype == "":
613
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
614
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
615
+ expected_dtype = "ARRAY"
616
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
617
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
618
+ expected_dtype = "ARRAY"
619
+ else:
620
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
621
+ # We can only infer the output types from the input types if the following two statemetns are true:
622
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
623
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
624
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
625
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
626
+
611
627
  output_df = self._batch_inference(
612
628
  dataset=dataset,
613
629
  inference_method="transform",
@@ -581,6 +581,22 @@ class SparsePCA(BaseTransformer):
581
581
  # each row containing a list of values.
582
582
  expected_dtype = "ARRAY"
583
583
 
584
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
585
+ if expected_dtype == "":
586
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
587
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
588
+ expected_dtype = "ARRAY"
589
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
590
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
591
+ expected_dtype = "ARRAY"
592
+ else:
593
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
594
+ # We can only infer the output types from the input types if the following two statemetns are true:
595
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
596
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
597
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
598
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
599
+
584
600
  output_df = self._batch_inference(
585
601
  dataset=dataset,
586
602
  inference_method="transform",
@@ -562,6 +562,22 @@ class TruncatedSVD(BaseTransformer):
562
562
  # each row containing a list of values.
563
563
  expected_dtype = "ARRAY"
564
564
 
565
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
566
+ if expected_dtype == "":
567
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
568
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
569
+ expected_dtype = "ARRAY"
570
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
571
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
572
+ expected_dtype = "ARRAY"
573
+ else:
574
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
575
+ # We can only infer the output types from the input types if the following two statemetns are true:
576
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
577
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
578
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
579
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
580
+
565
581
  output_df = self._batch_inference(
566
582
  dataset=dataset,
567
583
  inference_method="transform",
@@ -581,6 +581,22 @@ class LinearDiscriminantAnalysis(BaseTransformer):
581
581
  # each row containing a list of values.
582
582
  expected_dtype = "ARRAY"
583
583
 
584
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
585
+ if expected_dtype == "":
586
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
587
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
588
+ expected_dtype = "ARRAY"
589
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
590
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
591
+ expected_dtype = "ARRAY"
592
+ else:
593
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
594
+ # We can only infer the output types from the input types if the following two statemetns are true:
595
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
596
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
597
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
598
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
599
+
584
600
  output_df = self._batch_inference(
585
601
  dataset=dataset,
586
602
  inference_method="transform",
@@ -541,6 +541,22 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
541
541
  # each row containing a list of values.
542
542
  expected_dtype = "ARRAY"
543
543
 
544
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
545
+ if expected_dtype == "":
546
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
547
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
548
+ expected_dtype = "ARRAY"
549
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
550
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
551
+ expected_dtype = "ARRAY"
552
+ else:
553
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
554
+ # We can only infer the output types from the input types if the following two statemetns are true:
555
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
556
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
557
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
558
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
559
+
544
560
  output_df = self._batch_inference(
545
561
  dataset=dataset,
546
562
  inference_method="transform",
@@ -566,6 +566,22 @@ class AdaBoostClassifier(BaseTransformer):
566
566
  # each row containing a list of values.
567
567
  expected_dtype = "ARRAY"
568
568
 
569
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
570
+ if expected_dtype == "":
571
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
572
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
573
+ expected_dtype = "ARRAY"
574
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
575
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
576
+ expected_dtype = "ARRAY"
577
+ else:
578
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
579
+ # We can only infer the output types from the input types if the following two statemetns are true:
580
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
581
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
582
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
583
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
584
+
569
585
  output_df = self._batch_inference(
570
586
  dataset=dataset,
571
587
  inference_method="transform",
@@ -563,6 +563,22 @@ class AdaBoostRegressor(BaseTransformer):
563
563
  # each row containing a list of values.
564
564
  expected_dtype = "ARRAY"
565
565
 
566
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
567
+ if expected_dtype == "":
568
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
569
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
570
+ expected_dtype = "ARRAY"
571
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
572
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
573
+ expected_dtype = "ARRAY"
574
+ else:
575
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
576
+ # We can only infer the output types from the input types if the following two statemetns are true:
577
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
578
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
579
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
580
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
581
+
566
582
  output_df = self._batch_inference(
567
583
  dataset=dataset,
568
584
  inference_method="transform",
@@ -598,6 +598,22 @@ class BaggingClassifier(BaseTransformer):
598
598
  # each row containing a list of values.
599
599
  expected_dtype = "ARRAY"
600
600
 
601
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
602
+ if expected_dtype == "":
603
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
604
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
605
+ expected_dtype = "ARRAY"
606
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
607
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
608
+ expected_dtype = "ARRAY"
609
+ else:
610
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
611
+ # We can only infer the output types from the input types if the following two statemetns are true:
612
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
613
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
614
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
615
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
616
+
601
617
  output_df = self._batch_inference(
602
618
  dataset=dataset,
603
619
  inference_method="transform",
@@ -598,6 +598,22 @@ class BaggingRegressor(BaseTransformer):
598
598
  # each row containing a list of values.
599
599
  expected_dtype = "ARRAY"
600
600
 
601
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
602
+ if expected_dtype == "":
603
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
604
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
605
+ expected_dtype = "ARRAY"
606
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
607
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
608
+ expected_dtype = "ARRAY"
609
+ else:
610
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
611
+ # We can only infer the output types from the input types if the following two statemetns are true:
612
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
613
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
614
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
615
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
616
+
601
617
  output_df = self._batch_inference(
602
618
  dataset=dataset,
603
619
  inference_method="transform",
@@ -701,6 +701,22 @@ class ExtraTreesClassifier(BaseTransformer):
701
701
  # each row containing a list of values.
702
702
  expected_dtype = "ARRAY"
703
703
 
704
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
705
+ if expected_dtype == "":
706
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
707
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
708
+ expected_dtype = "ARRAY"
709
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
710
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
711
+ expected_dtype = "ARRAY"
712
+ else:
713
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
714
+ # We can only infer the output types from the input types if the following two statemetns are true:
715
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
716
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
717
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
718
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
719
+
704
720
  output_df = self._batch_inference(
705
721
  dataset=dataset,
706
722
  inference_method="transform",
@@ -680,6 +680,22 @@ class ExtraTreesRegressor(BaseTransformer):
680
680
  # each row containing a list of values.
681
681
  expected_dtype = "ARRAY"
682
682
 
683
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
684
+ if expected_dtype == "":
685
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
686
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
687
+ expected_dtype = "ARRAY"
688
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
689
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
690
+ expected_dtype = "ARRAY"
691
+ else:
692
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
693
+ # We can only infer the output types from the input types if the following two statemetns are true:
694
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
695
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
696
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
697
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
698
+
683
699
  output_df = self._batch_inference(
684
700
  dataset=dataset,
685
701
  inference_method="transform",
@@ -713,6 +713,22 @@ class GradientBoostingClassifier(BaseTransformer):
713
713
  # each row containing a list of values.
714
714
  expected_dtype = "ARRAY"
715
715
 
716
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
717
+ if expected_dtype == "":
718
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
719
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
720
+ expected_dtype = "ARRAY"
721
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
722
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
723
+ expected_dtype = "ARRAY"
724
+ else:
725
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
726
+ # We can only infer the output types from the input types if the following two statemetns are true:
727
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
728
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
729
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
730
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
731
+
716
732
  output_df = self._batch_inference(
717
733
  dataset=dataset,
718
734
  inference_method="transform",
@@ -722,6 +722,22 @@ class GradientBoostingRegressor(BaseTransformer):
722
722
  # each row containing a list of values.
723
723
  expected_dtype = "ARRAY"
724
724
 
725
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
726
+ if expected_dtype == "":
727
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
728
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
729
+ expected_dtype = "ARRAY"
730
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
731
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
732
+ expected_dtype = "ARRAY"
733
+ else:
734
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
735
+ # We can only infer the output types from the input types if the following two statemetns are true:
736
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
737
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
738
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
739
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
740
+
725
741
  output_df = self._batch_inference(
726
742
  dataset=dataset,
727
743
  inference_method="transform",
@@ -694,6 +694,22 @@ class HistGradientBoostingClassifier(BaseTransformer):
694
694
  # each row containing a list of values.
695
695
  expected_dtype = "ARRAY"
696
696
 
697
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
698
+ if expected_dtype == "":
699
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
700
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
701
+ expected_dtype = "ARRAY"
702
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
703
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
704
+ expected_dtype = "ARRAY"
705
+ else:
706
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
707
+ # We can only infer the output types from the input types if the following two statemetns are true:
708
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
709
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
710
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
711
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
712
+
697
713
  output_df = self._batch_inference(
698
714
  dataset=dataset,
699
715
  inference_method="transform",
@@ -685,6 +685,22 @@ class HistGradientBoostingRegressor(BaseTransformer):
685
685
  # each row containing a list of values.
686
686
  expected_dtype = "ARRAY"
687
687
 
688
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
689
+ if expected_dtype == "":
690
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
691
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
692
+ expected_dtype = "ARRAY"
693
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
694
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
695
+ expected_dtype = "ARRAY"
696
+ else:
697
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
698
+ # We can only infer the output types from the input types if the following two statemetns are true:
699
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
700
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
701
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
702
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
703
+
688
704
  output_df = self._batch_inference(
689
705
  dataset=dataset,
690
706
  inference_method="transform",