snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. snowflake/ml/_internal/telemetry.py +19 -0
  2. snowflake/ml/model/_client/ops/model_ops.py +16 -38
  3. snowflake/ml/model/_client/sql/model.py +1 -7
  4. snowflake/ml/model/_client/sql/model_version.py +20 -15
  5. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
  6. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
  7. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
  8. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
  9. snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
  10. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
  11. snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
  12. snowflake/ml/model/type_hints.py +3 -0
  13. snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +63 -95
  14. snowflake/ml/modeling/_internal/snowpark_handlers.py +9 -6
  15. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +16 -0
  16. snowflake/ml/modeling/cluster/affinity_propagation.py +16 -0
  17. snowflake/ml/modeling/cluster/agglomerative_clustering.py +16 -0
  18. snowflake/ml/modeling/cluster/birch.py +16 -0
  19. snowflake/ml/modeling/cluster/bisecting_k_means.py +16 -0
  20. snowflake/ml/modeling/cluster/dbscan.py +16 -0
  21. snowflake/ml/modeling/cluster/feature_agglomeration.py +16 -0
  22. snowflake/ml/modeling/cluster/k_means.py +16 -0
  23. snowflake/ml/modeling/cluster/mean_shift.py +16 -0
  24. snowflake/ml/modeling/cluster/mini_batch_k_means.py +16 -0
  25. snowflake/ml/modeling/cluster/optics.py +16 -0
  26. snowflake/ml/modeling/cluster/spectral_biclustering.py +16 -0
  27. snowflake/ml/modeling/cluster/spectral_clustering.py +16 -0
  28. snowflake/ml/modeling/cluster/spectral_coclustering.py +16 -0
  29. snowflake/ml/modeling/compose/column_transformer.py +16 -0
  30. snowflake/ml/modeling/compose/transformed_target_regressor.py +16 -0
  31. snowflake/ml/modeling/covariance/elliptic_envelope.py +16 -0
  32. snowflake/ml/modeling/covariance/empirical_covariance.py +16 -0
  33. snowflake/ml/modeling/covariance/graphical_lasso.py +16 -0
  34. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +16 -0
  35. snowflake/ml/modeling/covariance/ledoit_wolf.py +16 -0
  36. snowflake/ml/modeling/covariance/min_cov_det.py +16 -0
  37. snowflake/ml/modeling/covariance/oas.py +16 -0
  38. snowflake/ml/modeling/covariance/shrunk_covariance.py +16 -0
  39. snowflake/ml/modeling/decomposition/dictionary_learning.py +16 -0
  40. snowflake/ml/modeling/decomposition/factor_analysis.py +16 -0
  41. snowflake/ml/modeling/decomposition/fast_ica.py +16 -0
  42. snowflake/ml/modeling/decomposition/incremental_pca.py +16 -0
  43. snowflake/ml/modeling/decomposition/kernel_pca.py +16 -0
  44. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +16 -0
  45. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +16 -0
  46. snowflake/ml/modeling/decomposition/pca.py +16 -0
  47. snowflake/ml/modeling/decomposition/sparse_pca.py +16 -0
  48. snowflake/ml/modeling/decomposition/truncated_svd.py +16 -0
  49. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +16 -0
  50. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +16 -0
  51. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +16 -0
  52. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +16 -0
  53. snowflake/ml/modeling/ensemble/bagging_classifier.py +16 -0
  54. snowflake/ml/modeling/ensemble/bagging_regressor.py +16 -0
  55. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +16 -0
  56. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +16 -0
  57. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +16 -0
  58. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +16 -0
  59. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +16 -0
  60. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +16 -0
  61. snowflake/ml/modeling/ensemble/isolation_forest.py +16 -0
  62. snowflake/ml/modeling/ensemble/random_forest_classifier.py +16 -0
  63. snowflake/ml/modeling/ensemble/random_forest_regressor.py +16 -0
  64. snowflake/ml/modeling/ensemble/stacking_regressor.py +16 -0
  65. snowflake/ml/modeling/ensemble/voting_classifier.py +16 -0
  66. snowflake/ml/modeling/ensemble/voting_regressor.py +16 -0
  67. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +16 -0
  68. snowflake/ml/modeling/feature_selection/select_fdr.py +16 -0
  69. snowflake/ml/modeling/feature_selection/select_fpr.py +16 -0
  70. snowflake/ml/modeling/feature_selection/select_fwe.py +16 -0
  71. snowflake/ml/modeling/feature_selection/select_k_best.py +16 -0
  72. snowflake/ml/modeling/feature_selection/select_percentile.py +16 -0
  73. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +16 -0
  74. snowflake/ml/modeling/feature_selection/variance_threshold.py +16 -0
  75. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +16 -0
  76. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +16 -0
  77. snowflake/ml/modeling/impute/iterative_imputer.py +16 -0
  78. snowflake/ml/modeling/impute/knn_imputer.py +16 -0
  79. snowflake/ml/modeling/impute/missing_indicator.py +16 -0
  80. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +16 -0
  81. snowflake/ml/modeling/kernel_approximation/nystroem.py +16 -0
  82. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +16 -0
  83. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +16 -0
  84. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +16 -0
  85. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +16 -0
  86. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +16 -0
  87. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +16 -0
  88. snowflake/ml/modeling/linear_model/ard_regression.py +16 -0
  89. snowflake/ml/modeling/linear_model/bayesian_ridge.py +16 -0
  90. snowflake/ml/modeling/linear_model/elastic_net.py +16 -0
  91. snowflake/ml/modeling/linear_model/elastic_net_cv.py +16 -0
  92. snowflake/ml/modeling/linear_model/gamma_regressor.py +16 -0
  93. snowflake/ml/modeling/linear_model/huber_regressor.py +16 -0
  94. snowflake/ml/modeling/linear_model/lars.py +16 -0
  95. snowflake/ml/modeling/linear_model/lars_cv.py +16 -0
  96. snowflake/ml/modeling/linear_model/lasso.py +16 -0
  97. snowflake/ml/modeling/linear_model/lasso_cv.py +16 -0
  98. snowflake/ml/modeling/linear_model/lasso_lars.py +16 -0
  99. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +16 -0
  100. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +16 -0
  101. snowflake/ml/modeling/linear_model/linear_regression.py +16 -0
  102. snowflake/ml/modeling/linear_model/logistic_regression.py +16 -0
  103. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +16 -0
  104. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +16 -0
  105. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +16 -0
  106. snowflake/ml/modeling/linear_model/multi_task_lasso.py +16 -0
  107. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +16 -0
  108. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +16 -0
  109. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +16 -0
  110. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +16 -0
  111. snowflake/ml/modeling/linear_model/perceptron.py +16 -0
  112. snowflake/ml/modeling/linear_model/poisson_regressor.py +16 -0
  113. snowflake/ml/modeling/linear_model/ransac_regressor.py +16 -0
  114. snowflake/ml/modeling/linear_model/ridge.py +16 -0
  115. snowflake/ml/modeling/linear_model/ridge_classifier.py +16 -0
  116. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +16 -0
  117. snowflake/ml/modeling/linear_model/ridge_cv.py +16 -0
  118. snowflake/ml/modeling/linear_model/sgd_classifier.py +16 -0
  119. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +16 -0
  120. snowflake/ml/modeling/linear_model/sgd_regressor.py +16 -0
  121. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +16 -0
  122. snowflake/ml/modeling/linear_model/tweedie_regressor.py +16 -0
  123. snowflake/ml/modeling/manifold/isomap.py +16 -0
  124. snowflake/ml/modeling/manifold/mds.py +16 -0
  125. snowflake/ml/modeling/manifold/spectral_embedding.py +16 -0
  126. snowflake/ml/modeling/manifold/tsne.py +16 -0
  127. snowflake/ml/modeling/metrics/classification.py +5 -6
  128. snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
  129. snowflake/ml/modeling/metrics/ranking.py +7 -3
  130. snowflake/ml/modeling/metrics/regression.py +6 -3
  131. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +16 -0
  132. snowflake/ml/modeling/mixture/gaussian_mixture.py +16 -0
  133. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +16 -0
  134. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +16 -0
  135. snowflake/ml/modeling/multiclass/output_code_classifier.py +16 -0
  136. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +16 -0
  137. snowflake/ml/modeling/naive_bayes/categorical_nb.py +16 -0
  138. snowflake/ml/modeling/naive_bayes/complement_nb.py +16 -0
  139. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +16 -0
  140. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +16 -0
  141. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +16 -0
  142. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +16 -0
  143. snowflake/ml/modeling/neighbors/kernel_density.py +16 -0
  144. snowflake/ml/modeling/neighbors/local_outlier_factor.py +16 -0
  145. snowflake/ml/modeling/neighbors/nearest_centroid.py +16 -0
  146. snowflake/ml/modeling/neighbors/nearest_neighbors.py +16 -0
  147. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +16 -0
  148. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +16 -0
  149. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +16 -0
  150. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +16 -0
  151. snowflake/ml/modeling/neural_network/mlp_classifier.py +16 -0
  152. snowflake/ml/modeling/neural_network/mlp_regressor.py +16 -0
  153. snowflake/ml/modeling/preprocessing/polynomial_features.py +16 -0
  154. snowflake/ml/modeling/semi_supervised/label_propagation.py +16 -0
  155. snowflake/ml/modeling/semi_supervised/label_spreading.py +16 -0
  156. snowflake/ml/modeling/svm/linear_svc.py +16 -0
  157. snowflake/ml/modeling/svm/linear_svr.py +16 -0
  158. snowflake/ml/modeling/svm/nu_svc.py +16 -0
  159. snowflake/ml/modeling/svm/nu_svr.py +16 -0
  160. snowflake/ml/modeling/svm/svc.py +16 -0
  161. snowflake/ml/modeling/svm/svr.py +16 -0
  162. snowflake/ml/modeling/tree/decision_tree_classifier.py +16 -0
  163. snowflake/ml/modeling/tree/decision_tree_regressor.py +16 -0
  164. snowflake/ml/modeling/tree/extra_tree_classifier.py +16 -0
  165. snowflake/ml/modeling/tree/extra_tree_regressor.py +16 -0
  166. snowflake/ml/modeling/xgboost/xgb_classifier.py +16 -0
  167. snowflake/ml/modeling/xgboost/xgb_regressor.py +16 -0
  168. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +16 -0
  169. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +16 -0
  170. snowflake/ml/registry/registry.py +2 -0
  171. snowflake/ml/version.py +1 -1
  172. snowflake_ml_python-1.2.1.dist-info/LICENSE.txt +202 -0
  173. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/METADATA +261 -50
  174. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/RECORD +189 -186
  175. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/WHEEL +2 -1
  176. snowflake_ml_python-1.2.1.dist-info/top_level.txt +1 -0
@@ -306,7 +306,7 @@ class SnowparkHandlers:
306
306
  input_cols: List[str],
307
307
  label_cols: List[str],
308
308
  sample_weight_col: Optional[str],
309
- statement_params: Dict[str, str],
309
+ score_statement_params: Dict[str, str],
310
310
  ) -> float:
311
311
  import inspect
312
312
  import os
@@ -317,13 +317,13 @@ class SnowparkHandlers:
317
317
  importlib.import_module(import_name)
318
318
 
319
319
  for query in sql_queries[:-1]:
320
- _ = session.sql(query).collect(statement_params=statement_params)
320
+ _ = session.sql(query).collect(statement_params=score_statement_params)
321
321
  sp_df = session.sql(sql_queries[-1])
322
- df: pd.DataFrame = sp_df.to_pandas(statement_params=statement_params)
322
+ df: pd.DataFrame = sp_df.to_pandas(statement_params=score_statement_params)
323
323
  df.columns = sp_df.columns
324
324
 
325
325
  local_score_file_name = get_temp_file_path()
326
- session.file.get(stage_score_file_name, local_score_file_name, statement_params=statement_params)
326
+ session.file.get(stage_score_file_name, local_score_file_name, statement_params=score_statement_params)
327
327
 
328
328
  local_score_file_name_path = os.path.join(local_score_file_name, os.listdir(local_score_file_name)[0])
329
329
  with open(local_score_file_name_path, mode="r+b") as local_score_file_obj:
@@ -348,7 +348,7 @@ class SnowparkHandlers:
348
348
  return result
349
349
 
350
350
  # Call score sproc
351
- statement_params = telemetry.get_function_usage_statement_params(
351
+ score_statement_params = telemetry.get_function_usage_statement_params(
352
352
  project=_PROJECT,
353
353
  subproject=self._subproject,
354
354
  function_name=telemetry.get_statement_params_full_func_name(
@@ -357,6 +357,8 @@ class SnowparkHandlers:
357
357
  api_calls=[Session.call],
358
358
  custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
359
359
  )
360
+
361
+ kwargs = telemetry.get_sproc_statement_params_kwargs(score_wrapper_sproc, score_statement_params)
360
362
  score: float = score_wrapper_sproc(
361
363
  session,
362
364
  queries,
@@ -364,7 +366,8 @@ class SnowparkHandlers:
364
366
  input_cols,
365
367
  label_cols,
366
368
  sample_weight_col,
367
- statement_params,
369
+ score_statement_params,
370
+ **kwargs,
368
371
  )
369
372
 
370
373
  cleanup_temp_files([local_score_file_name])
@@ -589,6 +589,22 @@ class CalibratedClassifierCV(BaseTransformer):
589
589
  # each row containing a list of values.
590
590
  expected_dtype = "ARRAY"
591
591
 
592
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
593
+ if expected_dtype == "":
594
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
595
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
596
+ expected_dtype = "ARRAY"
597
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
598
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
599
+ expected_dtype = "ARRAY"
600
+ else:
601
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
602
+ # We can only infer the output types from the input types if the following two statemetns are true:
603
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
604
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
605
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
606
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
607
+
592
608
  output_df = self._batch_inference(
593
609
  dataset=dataset,
594
610
  inference_method="transform",
@@ -564,6 +564,22 @@ class AffinityPropagation(BaseTransformer):
564
564
  # each row containing a list of values.
565
565
  expected_dtype = "ARRAY"
566
566
 
567
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
568
+ if expected_dtype == "":
569
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
570
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
571
+ expected_dtype = "ARRAY"
572
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
573
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
574
+ expected_dtype = "ARRAY"
575
+ else:
576
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
577
+ # We can only infer the output types from the input types if the following two statemetns are true:
578
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
579
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
580
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
581
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
582
+
567
583
  output_df = self._batch_inference(
568
584
  dataset=dataset,
569
585
  inference_method="transform",
@@ -595,6 +595,22 @@ class AgglomerativeClustering(BaseTransformer):
595
595
  # each row containing a list of values.
596
596
  expected_dtype = "ARRAY"
597
597
 
598
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
599
+ if expected_dtype == "":
600
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
601
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
602
+ expected_dtype = "ARRAY"
603
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
604
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
605
+ expected_dtype = "ARRAY"
606
+ else:
607
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
608
+ # We can only infer the output types from the input types if the following two statemetns are true:
609
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
610
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
611
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
612
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
613
+
598
614
  output_df = self._batch_inference(
599
615
  dataset=dataset,
600
616
  inference_method="transform",
@@ -557,6 +557,22 @@ class Birch(BaseTransformer):
557
557
  # each row containing a list of values.
558
558
  expected_dtype = "ARRAY"
559
559
 
560
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
561
+ if expected_dtype == "":
562
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
563
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
564
+ expected_dtype = "ARRAY"
565
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
566
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
567
+ expected_dtype = "ARRAY"
568
+ else:
569
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
570
+ # We can only infer the output types from the input types if the following two statemetns are true:
571
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
572
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
573
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
574
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
575
+
560
576
  output_df = self._batch_inference(
561
577
  dataset=dataset,
562
578
  inference_method="transform",
@@ -606,6 +606,22 @@ class BisectingKMeans(BaseTransformer):
606
606
  # each row containing a list of values.
607
607
  expected_dtype = "ARRAY"
608
608
 
609
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
610
+ if expected_dtype == "":
611
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
612
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
613
+ expected_dtype = "ARRAY"
614
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
615
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
616
+ expected_dtype = "ARRAY"
617
+ else:
618
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
619
+ # We can only infer the output types from the input types if the following two statemetns are true:
620
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
621
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
622
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
623
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
624
+
609
625
  output_df = self._batch_inference(
610
626
  dataset=dataset,
611
627
  inference_method="transform",
@@ -570,6 +570,22 @@ class DBSCAN(BaseTransformer):
570
570
  # each row containing a list of values.
571
571
  expected_dtype = "ARRAY"
572
572
 
573
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
574
+ if expected_dtype == "":
575
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
576
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
577
+ expected_dtype = "ARRAY"
578
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
579
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
580
+ expected_dtype = "ARRAY"
581
+ else:
582
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
583
+ # We can only infer the output types from the input types if the following two statemetns are true:
584
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
585
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
586
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
587
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
588
+
573
589
  output_df = self._batch_inference(
574
590
  dataset=dataset,
575
591
  inference_method="transform",
@@ -604,6 +604,22 @@ class FeatureAgglomeration(BaseTransformer):
604
604
  # each row containing a list of values.
605
605
  expected_dtype = "ARRAY"
606
606
 
607
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
608
+ if expected_dtype == "":
609
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
610
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
611
+ expected_dtype = "ARRAY"
612
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
613
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
614
+ expected_dtype = "ARRAY"
615
+ else:
616
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
617
+ # We can only infer the output types from the input types if the following two statemetns are true:
618
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
619
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
620
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
621
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
622
+
607
623
  output_df = self._batch_inference(
608
624
  dataset=dataset,
609
625
  inference_method="transform",
@@ -601,6 +601,22 @@ class KMeans(BaseTransformer):
601
601
  # each row containing a list of values.
602
602
  expected_dtype = "ARRAY"
603
603
 
604
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
605
+ if expected_dtype == "":
606
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
607
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
608
+ expected_dtype = "ARRAY"
609
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
610
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
611
+ expected_dtype = "ARRAY"
612
+ else:
613
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
614
+ # We can only infer the output types from the input types if the following two statemetns are true:
615
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
616
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
617
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
618
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
619
+
604
620
  output_df = self._batch_inference(
605
621
  dataset=dataset,
606
622
  inference_method="transform",
@@ -575,6 +575,22 @@ class MeanShift(BaseTransformer):
575
575
  # each row containing a list of values.
576
576
  expected_dtype = "ARRAY"
577
577
 
578
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
579
+ if expected_dtype == "":
580
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
581
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
582
+ expected_dtype = "ARRAY"
583
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
584
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
585
+ expected_dtype = "ARRAY"
586
+ else:
587
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
588
+ # We can only infer the output types from the input types if the following two statemetns are true:
589
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
590
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
591
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
592
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
593
+
578
594
  output_df = self._batch_inference(
579
595
  dataset=dataset,
580
596
  inference_method="transform",
@@ -627,6 +627,22 @@ class MiniBatchKMeans(BaseTransformer):
627
627
  # each row containing a list of values.
628
628
  expected_dtype = "ARRAY"
629
629
 
630
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
631
+ if expected_dtype == "":
632
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
633
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
634
+ expected_dtype = "ARRAY"
635
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
636
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
637
+ expected_dtype = "ARRAY"
638
+ else:
639
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
640
+ # We can only infer the output types from the input types if the following two statemetns are true:
641
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
642
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
643
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
644
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
645
+
630
646
  output_df = self._batch_inference(
631
647
  dataset=dataset,
632
648
  inference_method="transform",
@@ -643,6 +643,22 @@ class OPTICS(BaseTransformer):
643
643
  # each row containing a list of values.
644
644
  expected_dtype = "ARRAY"
645
645
 
646
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
647
+ if expected_dtype == "":
648
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
649
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
650
+ expected_dtype = "ARRAY"
651
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
652
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
653
+ expected_dtype = "ARRAY"
654
+ else:
655
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
656
+ # We can only infer the output types from the input types if the following two statemetns are true:
657
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
658
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
659
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
660
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
661
+
646
662
  output_df = self._batch_inference(
647
663
  dataset=dataset,
648
664
  inference_method="transform",
@@ -581,6 +581,22 @@ class SpectralBiclustering(BaseTransformer):
581
581
  # each row containing a list of values.
582
582
  expected_dtype = "ARRAY"
583
583
 
584
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
585
+ if expected_dtype == "":
586
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
587
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
588
+ expected_dtype = "ARRAY"
589
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
590
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
591
+ expected_dtype = "ARRAY"
592
+ else:
593
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
594
+ # We can only infer the output types from the input types if the following two statemetns are true:
595
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
596
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
597
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
598
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
599
+
584
600
  output_df = self._batch_inference(
585
601
  dataset=dataset,
586
602
  inference_method="transform",
@@ -639,6 +639,22 @@ class SpectralClustering(BaseTransformer):
639
639
  # each row containing a list of values.
640
640
  expected_dtype = "ARRAY"
641
641
 
642
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
643
+ if expected_dtype == "":
644
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
645
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
646
+ expected_dtype = "ARRAY"
647
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
648
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
649
+ expected_dtype = "ARRAY"
650
+ else:
651
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
652
+ # We can only infer the output types from the input types if the following two statemetns are true:
653
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
654
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
655
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
656
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
657
+
642
658
  output_df = self._batch_inference(
643
659
  dataset=dataset,
644
660
  inference_method="transform",
@@ -560,6 +560,22 @@ class SpectralCoclustering(BaseTransformer):
560
560
  # each row containing a list of values.
561
561
  expected_dtype = "ARRAY"
562
562
 
563
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
564
+ if expected_dtype == "":
565
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
566
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
567
+ expected_dtype = "ARRAY"
568
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
569
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
570
+ expected_dtype = "ARRAY"
571
+ else:
572
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
573
+ # We can only infer the output types from the input types if the following two statemetns are true:
574
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
575
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
576
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
577
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
578
+
563
579
  output_df = self._batch_inference(
564
580
  dataset=dataset,
565
581
  inference_method="transform",
@@ -592,6 +592,22 @@ class ColumnTransformer(BaseTransformer):
592
592
  # each row containing a list of values.
593
593
  expected_dtype = "ARRAY"
594
594
 
595
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
596
+ if expected_dtype == "":
597
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
598
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
599
+ expected_dtype = "ARRAY"
600
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
601
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
602
+ expected_dtype = "ARRAY"
603
+ else:
604
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
605
+ # We can only infer the output types from the input types if the following two statemetns are true:
606
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
607
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
608
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
609
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
610
+
595
611
  output_df = self._batch_inference(
596
612
  dataset=dataset,
597
613
  inference_method="transform",
@@ -553,6 +553,22 @@ class TransformedTargetRegressor(BaseTransformer):
553
553
  # each row containing a list of values.
554
554
  expected_dtype = "ARRAY"
555
555
 
556
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
557
+ if expected_dtype == "":
558
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
559
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
560
+ expected_dtype = "ARRAY"
561
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
562
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
563
+ expected_dtype = "ARRAY"
564
+ else:
565
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
566
+ # We can only infer the output types from the input types if the following two statemetns are true:
567
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
568
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
569
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
570
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
571
+
556
572
  output_df = self._batch_inference(
557
573
  dataset=dataset,
558
574
  inference_method="transform",
@@ -548,6 +548,22 @@ class EllipticEnvelope(BaseTransformer):
548
548
  # each row containing a list of values.
549
549
  expected_dtype = "ARRAY"
550
550
 
551
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
552
+ if expected_dtype == "":
553
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
554
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
555
+ expected_dtype = "ARRAY"
556
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
557
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
558
+ expected_dtype = "ARRAY"
559
+ else:
560
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
561
+ # We can only infer the output types from the input types if the following two statemetns are true:
562
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
563
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
564
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
565
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
566
+
551
567
  output_df = self._batch_inference(
552
568
  dataset=dataset,
553
569
  inference_method="transform",
@@ -522,6 +522,22 @@ class EmpiricalCovariance(BaseTransformer):
522
522
  # each row containing a list of values.
523
523
  expected_dtype = "ARRAY"
524
524
 
525
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
526
+ if expected_dtype == "":
527
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
528
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
529
+ expected_dtype = "ARRAY"
530
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
531
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
532
+ expected_dtype = "ARRAY"
533
+ else:
534
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
535
+ # We can only infer the output types from the input types if the following two statemetns are true:
536
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
537
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
538
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
539
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
540
+
525
541
  output_df = self._batch_inference(
526
542
  dataset=dataset,
527
543
  inference_method="transform",
@@ -570,6 +570,22 @@ class GraphicalLasso(BaseTransformer):
570
570
  # each row containing a list of values.
571
571
  expected_dtype = "ARRAY"
572
572
 
573
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
574
+ if expected_dtype == "":
575
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
576
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
577
+ expected_dtype = "ARRAY"
578
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
579
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
580
+ expected_dtype = "ARRAY"
581
+ else:
582
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
583
+ # We can only infer the output types from the input types if the following two statemetns are true:
584
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
585
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
586
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
587
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
588
+
573
589
  output_df = self._batch_inference(
574
590
  dataset=dataset,
575
591
  inference_method="transform",
@@ -596,6 +596,22 @@ class GraphicalLassoCV(BaseTransformer):
596
596
  # each row containing a list of values.
597
597
  expected_dtype = "ARRAY"
598
598
 
599
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
600
+ if expected_dtype == "":
601
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
602
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
603
+ expected_dtype = "ARRAY"
604
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
605
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
606
+ expected_dtype = "ARRAY"
607
+ else:
608
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
609
+ # We can only infer the output types from the input types if the following two statemetns are true:
610
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
611
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
612
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
613
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
614
+
599
615
  output_df = self._batch_inference(
600
616
  dataset=dataset,
601
617
  inference_method="transform",
@@ -529,6 +529,22 @@ class LedoitWolf(BaseTransformer):
529
529
  # each row containing a list of values.
530
530
  expected_dtype = "ARRAY"
531
531
 
532
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
533
+ if expected_dtype == "":
534
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
535
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
536
+ expected_dtype = "ARRAY"
537
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
538
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
539
+ expected_dtype = "ARRAY"
540
+ else:
541
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
542
+ # We can only infer the output types from the input types if the following two statemetns are true:
543
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
544
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
545
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
546
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
547
+
532
548
  output_df = self._batch_inference(
533
549
  dataset=dataset,
534
550
  inference_method="transform",
@@ -541,6 +541,22 @@ class MinCovDet(BaseTransformer):
541
541
  # each row containing a list of values.
542
542
  expected_dtype = "ARRAY"
543
543
 
544
+ # If we were unable to assign a type to this transform in the factory, infer the type here.
545
+ if expected_dtype == "":
546
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
547
+ if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
548
+ expected_dtype = "ARRAY"
549
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
550
+ elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
551
+ expected_dtype = "ARRAY"
552
+ else:
553
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
554
+ # We can only infer the output types from the input types if the following two statemetns are true:
555
+ # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
556
+ # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
557
+ if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
558
+ expected_dtype = convert_sp_to_sf_type(output_types[0])
559
+
544
560
  output_df = self._batch_inference(
545
561
  dataset=dataset,
546
562
  inference_method="transform",