snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/telemetry.py +19 -0
- snowflake/ml/model/_client/ops/model_ops.py +16 -38
- snowflake/ml/model/_client/sql/model.py +1 -7
- snowflake/ml/model/_client/sql/model_version.py +20 -15
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
- snowflake/ml/model/type_hints.py +3 -0
- snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +63 -95
- snowflake/ml/modeling/_internal/snowpark_handlers.py +9 -6
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +16 -0
- snowflake/ml/modeling/cluster/affinity_propagation.py +16 -0
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +16 -0
- snowflake/ml/modeling/cluster/birch.py +16 -0
- snowflake/ml/modeling/cluster/bisecting_k_means.py +16 -0
- snowflake/ml/modeling/cluster/dbscan.py +16 -0
- snowflake/ml/modeling/cluster/feature_agglomeration.py +16 -0
- snowflake/ml/modeling/cluster/k_means.py +16 -0
- snowflake/ml/modeling/cluster/mean_shift.py +16 -0
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +16 -0
- snowflake/ml/modeling/cluster/optics.py +16 -0
- snowflake/ml/modeling/cluster/spectral_biclustering.py +16 -0
- snowflake/ml/modeling/cluster/spectral_clustering.py +16 -0
- snowflake/ml/modeling/cluster/spectral_coclustering.py +16 -0
- snowflake/ml/modeling/compose/column_transformer.py +16 -0
- snowflake/ml/modeling/compose/transformed_target_regressor.py +16 -0
- snowflake/ml/modeling/covariance/elliptic_envelope.py +16 -0
- snowflake/ml/modeling/covariance/empirical_covariance.py +16 -0
- snowflake/ml/modeling/covariance/graphical_lasso.py +16 -0
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +16 -0
- snowflake/ml/modeling/covariance/ledoit_wolf.py +16 -0
- snowflake/ml/modeling/covariance/min_cov_det.py +16 -0
- snowflake/ml/modeling/covariance/oas.py +16 -0
- snowflake/ml/modeling/covariance/shrunk_covariance.py +16 -0
- snowflake/ml/modeling/decomposition/dictionary_learning.py +16 -0
- snowflake/ml/modeling/decomposition/factor_analysis.py +16 -0
- snowflake/ml/modeling/decomposition/fast_ica.py +16 -0
- snowflake/ml/modeling/decomposition/incremental_pca.py +16 -0
- snowflake/ml/modeling/decomposition/kernel_pca.py +16 -0
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +16 -0
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +16 -0
- snowflake/ml/modeling/decomposition/pca.py +16 -0
- snowflake/ml/modeling/decomposition/sparse_pca.py +16 -0
- snowflake/ml/modeling/decomposition/truncated_svd.py +16 -0
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +16 -0
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +16 -0
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/bagging_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/bagging_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/isolation_forest.py +16 -0
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/stacking_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/voting_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/voting_regressor.py +16 -0
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +16 -0
- snowflake/ml/modeling/feature_selection/select_fdr.py +16 -0
- snowflake/ml/modeling/feature_selection/select_fpr.py +16 -0
- snowflake/ml/modeling/feature_selection/select_fwe.py +16 -0
- snowflake/ml/modeling/feature_selection/select_k_best.py +16 -0
- snowflake/ml/modeling/feature_selection/select_percentile.py +16 -0
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +16 -0
- snowflake/ml/modeling/feature_selection/variance_threshold.py +16 -0
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +16 -0
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +16 -0
- snowflake/ml/modeling/impute/iterative_imputer.py +16 -0
- snowflake/ml/modeling/impute/knn_imputer.py +16 -0
- snowflake/ml/modeling/impute/missing_indicator.py +16 -0
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +16 -0
- snowflake/ml/modeling/kernel_approximation/nystroem.py +16 -0
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +16 -0
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +16 -0
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +16 -0
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +16 -0
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +16 -0
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/ard_regression.py +16 -0
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +16 -0
- snowflake/ml/modeling/linear_model/elastic_net.py +16 -0
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +16 -0
- snowflake/ml/modeling/linear_model/gamma_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/huber_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/lars.py +16 -0
- snowflake/ml/modeling/linear_model/lars_cv.py +16 -0
- snowflake/ml/modeling/linear_model/lasso.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_cv.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_lars.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +16 -0
- snowflake/ml/modeling/linear_model/linear_regression.py +16 -0
- snowflake/ml/modeling/linear_model/logistic_regression.py +16 -0
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +16 -0
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +16 -0
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +16 -0
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/perceptron.py +16 -0
- snowflake/ml/modeling/linear_model/poisson_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/ransac_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/ridge.py +16 -0
- snowflake/ml/modeling/linear_model/ridge_classifier.py +16 -0
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +16 -0
- snowflake/ml/modeling/linear_model/ridge_cv.py +16 -0
- snowflake/ml/modeling/linear_model/sgd_classifier.py +16 -0
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +16 -0
- snowflake/ml/modeling/linear_model/sgd_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +16 -0
- snowflake/ml/modeling/manifold/isomap.py +16 -0
- snowflake/ml/modeling/manifold/mds.py +16 -0
- snowflake/ml/modeling/manifold/spectral_embedding.py +16 -0
- snowflake/ml/modeling/manifold/tsne.py +16 -0
- snowflake/ml/modeling/metrics/classification.py +5 -6
- snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +7 -3
- snowflake/ml/modeling/metrics/regression.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +16 -0
- snowflake/ml/modeling/mixture/gaussian_mixture.py +16 -0
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +16 -0
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +16 -0
- snowflake/ml/modeling/multiclass/output_code_classifier.py +16 -0
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/complement_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +16 -0
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +16 -0
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +16 -0
- snowflake/ml/modeling/neighbors/kernel_density.py +16 -0
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +16 -0
- snowflake/ml/modeling/neighbors/nearest_centroid.py +16 -0
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +16 -0
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +16 -0
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +16 -0
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +16 -0
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +16 -0
- snowflake/ml/modeling/neural_network/mlp_classifier.py +16 -0
- snowflake/ml/modeling/neural_network/mlp_regressor.py +16 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +16 -0
- snowflake/ml/modeling/semi_supervised/label_propagation.py +16 -0
- snowflake/ml/modeling/semi_supervised/label_spreading.py +16 -0
- snowflake/ml/modeling/svm/linear_svc.py +16 -0
- snowflake/ml/modeling/svm/linear_svr.py +16 -0
- snowflake/ml/modeling/svm/nu_svc.py +16 -0
- snowflake/ml/modeling/svm/nu_svr.py +16 -0
- snowflake/ml/modeling/svm/svc.py +16 -0
- snowflake/ml/modeling/svm/svr.py +16 -0
- snowflake/ml/modeling/tree/decision_tree_classifier.py +16 -0
- snowflake/ml/modeling/tree/decision_tree_regressor.py +16 -0
- snowflake/ml/modeling/tree/extra_tree_classifier.py +16 -0
- snowflake/ml/modeling/tree/extra_tree_regressor.py +16 -0
- snowflake/ml/modeling/xgboost/xgb_classifier.py +16 -0
- snowflake/ml/modeling/xgboost/xgb_regressor.py +16 -0
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +16 -0
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +16 -0
- snowflake/ml/registry/registry.py +2 -0
- snowflake/ml/version.py +1 -1
- snowflake_ml_python-1.2.1.dist-info/LICENSE.txt +202 -0
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/METADATA +261 -50
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/RECORD +189 -186
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/WHEEL +2 -1
- snowflake_ml_python-1.2.1.dist-info/top_level.txt +1 -0
@@ -585,6 +585,22 @@ class IsolationForest(BaseTransformer):
|
|
585
585
|
# each row containing a list of values.
|
586
586
|
expected_dtype = "ARRAY"
|
587
587
|
|
588
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
589
|
+
if expected_dtype == "":
|
590
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
591
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
592
|
+
expected_dtype = "ARRAY"
|
593
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
594
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
595
|
+
expected_dtype = "ARRAY"
|
596
|
+
else:
|
597
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
598
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
599
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
600
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
601
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
602
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
603
|
+
|
588
604
|
output_df = self._batch_inference(
|
589
605
|
dataset=dataset,
|
590
606
|
inference_method="transform",
|
@@ -697,6 +697,22 @@ class RandomForestClassifier(BaseTransformer):
|
|
697
697
|
# each row containing a list of values.
|
698
698
|
expected_dtype = "ARRAY"
|
699
699
|
|
700
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
701
|
+
if expected_dtype == "":
|
702
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
703
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
704
|
+
expected_dtype = "ARRAY"
|
705
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
706
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
707
|
+
expected_dtype = "ARRAY"
|
708
|
+
else:
|
709
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
710
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
711
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
712
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
713
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
714
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
715
|
+
|
700
716
|
output_df = self._batch_inference(
|
701
717
|
dataset=dataset,
|
702
718
|
inference_method="transform",
|
@@ -676,6 +676,22 @@ class RandomForestRegressor(BaseTransformer):
|
|
676
676
|
# each row containing a list of values.
|
677
677
|
expected_dtype = "ARRAY"
|
678
678
|
|
679
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
680
|
+
if expected_dtype == "":
|
681
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
682
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
683
|
+
expected_dtype = "ARRAY"
|
684
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
685
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
686
|
+
expected_dtype = "ARRAY"
|
687
|
+
else:
|
688
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
689
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
690
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
691
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
692
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
693
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
694
|
+
|
679
695
|
output_df = self._batch_inference(
|
680
696
|
dataset=dataset,
|
681
697
|
inference_method="transform",
|
@@ -579,6 +579,22 @@ class StackingRegressor(BaseTransformer):
|
|
579
579
|
# each row containing a list of values.
|
580
580
|
expected_dtype = "ARRAY"
|
581
581
|
|
582
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
583
|
+
if expected_dtype == "":
|
584
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
585
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
586
|
+
expected_dtype = "ARRAY"
|
587
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
588
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
589
|
+
expected_dtype = "ARRAY"
|
590
|
+
else:
|
591
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
592
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
593
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
594
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
595
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
596
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
597
|
+
|
582
598
|
output_df = self._batch_inference(
|
583
599
|
dataset=dataset,
|
584
600
|
inference_method="transform",
|
@@ -561,6 +561,22 @@ class VotingClassifier(BaseTransformer):
|
|
561
561
|
# each row containing a list of values.
|
562
562
|
expected_dtype = "ARRAY"
|
563
563
|
|
564
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
565
|
+
if expected_dtype == "":
|
566
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
567
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
568
|
+
expected_dtype = "ARRAY"
|
569
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
570
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
571
|
+
expected_dtype = "ARRAY"
|
572
|
+
else:
|
573
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
574
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
575
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
576
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
577
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
578
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
579
|
+
|
564
580
|
output_df = self._batch_inference(
|
565
581
|
dataset=dataset,
|
566
582
|
inference_method="transform",
|
@@ -543,6 +543,22 @@ class VotingRegressor(BaseTransformer):
|
|
543
543
|
# each row containing a list of values.
|
544
544
|
expected_dtype = "ARRAY"
|
545
545
|
|
546
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
547
|
+
if expected_dtype == "":
|
548
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
549
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
550
|
+
expected_dtype = "ARRAY"
|
551
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
552
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
553
|
+
expected_dtype = "ARRAY"
|
554
|
+
else:
|
555
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
556
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
557
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
558
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
559
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
560
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
561
|
+
|
546
562
|
output_df = self._batch_inference(
|
547
563
|
dataset=dataset,
|
548
564
|
inference_method="transform",
|
@@ -531,6 +531,22 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
531
531
|
# each row containing a list of values.
|
532
532
|
expected_dtype = "ARRAY"
|
533
533
|
|
534
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
535
|
+
if expected_dtype == "":
|
536
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
537
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
538
|
+
expected_dtype = "ARRAY"
|
539
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
540
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
541
|
+
expected_dtype = "ARRAY"
|
542
|
+
else:
|
543
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
544
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
545
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
546
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
547
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
548
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
549
|
+
|
534
550
|
output_df = self._batch_inference(
|
535
551
|
dataset=dataset,
|
536
552
|
inference_method="transform",
|
@@ -527,6 +527,22 @@ class SelectFdr(BaseTransformer):
|
|
527
527
|
# each row containing a list of values.
|
528
528
|
expected_dtype = "ARRAY"
|
529
529
|
|
530
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
531
|
+
if expected_dtype == "":
|
532
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
533
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
534
|
+
expected_dtype = "ARRAY"
|
535
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
536
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
537
|
+
expected_dtype = "ARRAY"
|
538
|
+
else:
|
539
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
540
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
541
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
542
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
543
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
544
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
545
|
+
|
530
546
|
output_df = self._batch_inference(
|
531
547
|
dataset=dataset,
|
532
548
|
inference_method="transform",
|
@@ -527,6 +527,22 @@ class SelectFpr(BaseTransformer):
|
|
527
527
|
# each row containing a list of values.
|
528
528
|
expected_dtype = "ARRAY"
|
529
529
|
|
530
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
531
|
+
if expected_dtype == "":
|
532
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
533
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
534
|
+
expected_dtype = "ARRAY"
|
535
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
536
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
537
|
+
expected_dtype = "ARRAY"
|
538
|
+
else:
|
539
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
540
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
541
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
542
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
543
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
544
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
545
|
+
|
530
546
|
output_df = self._batch_inference(
|
531
547
|
dataset=dataset,
|
532
548
|
inference_method="transform",
|
@@ -527,6 +527,22 @@ class SelectFwe(BaseTransformer):
|
|
527
527
|
# each row containing a list of values.
|
528
528
|
expected_dtype = "ARRAY"
|
529
529
|
|
530
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
531
|
+
if expected_dtype == "":
|
532
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
533
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
534
|
+
expected_dtype = "ARRAY"
|
535
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
536
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
537
|
+
expected_dtype = "ARRAY"
|
538
|
+
else:
|
539
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
540
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
541
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
542
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
543
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
544
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
545
|
+
|
530
546
|
output_df = self._batch_inference(
|
531
547
|
dataset=dataset,
|
532
548
|
inference_method="transform",
|
@@ -528,6 +528,22 @@ class SelectKBest(BaseTransformer):
|
|
528
528
|
# each row containing a list of values.
|
529
529
|
expected_dtype = "ARRAY"
|
530
530
|
|
531
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
532
|
+
if expected_dtype == "":
|
533
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
534
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
535
|
+
expected_dtype = "ARRAY"
|
536
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
537
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
538
|
+
expected_dtype = "ARRAY"
|
539
|
+
else:
|
540
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
541
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
542
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
543
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
544
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
545
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
546
|
+
|
531
547
|
output_df = self._batch_inference(
|
532
548
|
dataset=dataset,
|
533
549
|
inference_method="transform",
|
@@ -527,6 +527,22 @@ class SelectPercentile(BaseTransformer):
|
|
527
527
|
# each row containing a list of values.
|
528
528
|
expected_dtype = "ARRAY"
|
529
529
|
|
530
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
531
|
+
if expected_dtype == "":
|
532
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
533
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
534
|
+
expected_dtype = "ARRAY"
|
535
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
536
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
537
|
+
expected_dtype = "ARRAY"
|
538
|
+
else:
|
539
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
540
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
541
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
542
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
543
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
544
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
545
|
+
|
530
546
|
output_df = self._batch_inference(
|
531
547
|
dataset=dataset,
|
532
548
|
inference_method="transform",
|
@@ -585,6 +585,22 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
585
585
|
# each row containing a list of values.
|
586
586
|
expected_dtype = "ARRAY"
|
587
587
|
|
588
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
589
|
+
if expected_dtype == "":
|
590
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
591
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
592
|
+
expected_dtype = "ARRAY"
|
593
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
594
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
595
|
+
expected_dtype = "ARRAY"
|
596
|
+
else:
|
597
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
598
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
599
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
600
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
601
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
602
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
603
|
+
|
588
604
|
output_df = self._batch_inference(
|
589
605
|
dataset=dataset,
|
590
606
|
inference_method="transform",
|
@@ -518,6 +518,22 @@ class VarianceThreshold(BaseTransformer):
|
|
518
518
|
# each row containing a list of values.
|
519
519
|
expected_dtype = "ARRAY"
|
520
520
|
|
521
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
522
|
+
if expected_dtype == "":
|
523
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
524
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
525
|
+
expected_dtype = "ARRAY"
|
526
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
527
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
528
|
+
expected_dtype = "ARRAY"
|
529
|
+
else:
|
530
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
531
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
532
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
533
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
534
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
535
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
536
|
+
|
521
537
|
output_df = self._batch_inference(
|
522
538
|
dataset=dataset,
|
523
539
|
inference_method="transform",
|
@@ -613,6 +613,22 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
613
613
|
# each row containing a list of values.
|
614
614
|
expected_dtype = "ARRAY"
|
615
615
|
|
616
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
617
|
+
if expected_dtype == "":
|
618
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
619
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
620
|
+
expected_dtype = "ARRAY"
|
621
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
622
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
623
|
+
expected_dtype = "ARRAY"
|
624
|
+
else:
|
625
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
626
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
627
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
628
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
629
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
630
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
631
|
+
|
616
632
|
output_df = self._batch_inference(
|
617
633
|
dataset=dataset,
|
618
634
|
inference_method="transform",
|
@@ -604,6 +604,22 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
604
604
|
# each row containing a list of values.
|
605
605
|
expected_dtype = "ARRAY"
|
606
606
|
|
607
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
608
|
+
if expected_dtype == "":
|
609
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
610
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
611
|
+
expected_dtype = "ARRAY"
|
612
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
613
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
614
|
+
expected_dtype = "ARRAY"
|
615
|
+
else:
|
616
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
617
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
618
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
619
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
620
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
621
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
622
|
+
|
607
623
|
output_df = self._batch_inference(
|
608
624
|
dataset=dataset,
|
609
625
|
inference_method="transform",
|
@@ -646,6 +646,22 @@ class IterativeImputer(BaseTransformer):
|
|
646
646
|
# each row containing a list of values.
|
647
647
|
expected_dtype = "ARRAY"
|
648
648
|
|
649
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
650
|
+
if expected_dtype == "":
|
651
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
652
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
653
|
+
expected_dtype = "ARRAY"
|
654
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
655
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
656
|
+
expected_dtype = "ARRAY"
|
657
|
+
else:
|
658
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
659
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
660
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
661
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
662
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
663
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
664
|
+
|
649
665
|
output_df = self._batch_inference(
|
650
666
|
dataset=dataset,
|
651
667
|
inference_method="transform",
|
@@ -572,6 +572,22 @@ class KNNImputer(BaseTransformer):
|
|
572
572
|
# each row containing a list of values.
|
573
573
|
expected_dtype = "ARRAY"
|
574
574
|
|
575
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
576
|
+
if expected_dtype == "":
|
577
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
578
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
579
|
+
expected_dtype = "ARRAY"
|
580
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
581
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
582
|
+
expected_dtype = "ARRAY"
|
583
|
+
else:
|
584
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
585
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
586
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
587
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
588
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
589
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
590
|
+
|
575
591
|
output_df = self._batch_inference(
|
576
592
|
dataset=dataset,
|
577
593
|
inference_method="transform",
|
@@ -546,6 +546,22 @@ class MissingIndicator(BaseTransformer):
|
|
546
546
|
# each row containing a list of values.
|
547
547
|
expected_dtype = "ARRAY"
|
548
548
|
|
549
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
550
|
+
if expected_dtype == "":
|
551
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
552
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
553
|
+
expected_dtype = "ARRAY"
|
554
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
555
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
556
|
+
expected_dtype = "ARRAY"
|
557
|
+
else:
|
558
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
559
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
560
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
561
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
562
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
563
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
564
|
+
|
549
565
|
output_df = self._batch_inference(
|
550
566
|
dataset=dataset,
|
551
567
|
inference_method="transform",
|
@@ -521,6 +521,22 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
521
521
|
# each row containing a list of values.
|
522
522
|
expected_dtype = "ARRAY"
|
523
523
|
|
524
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
525
|
+
if expected_dtype == "":
|
526
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
527
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
528
|
+
expected_dtype = "ARRAY"
|
529
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
530
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
531
|
+
expected_dtype = "ARRAY"
|
532
|
+
else:
|
533
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
534
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
535
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
536
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
537
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
538
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
539
|
+
|
524
540
|
output_df = self._batch_inference(
|
525
541
|
dataset=dataset,
|
526
542
|
inference_method="transform",
|
@@ -569,6 +569,22 @@ class Nystroem(BaseTransformer):
|
|
569
569
|
# each row containing a list of values.
|
570
570
|
expected_dtype = "ARRAY"
|
571
571
|
|
572
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
573
|
+
if expected_dtype == "":
|
574
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
575
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
576
|
+
expected_dtype = "ARRAY"
|
577
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
578
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
579
|
+
expected_dtype = "ARRAY"
|
580
|
+
else:
|
581
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
582
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
583
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
584
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
585
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
586
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
587
|
+
|
572
588
|
output_df = self._batch_inference(
|
573
589
|
dataset=dataset,
|
574
590
|
inference_method="transform",
|
@@ -545,6 +545,22 @@ class PolynomialCountSketch(BaseTransformer):
|
|
545
545
|
# each row containing a list of values.
|
546
546
|
expected_dtype = "ARRAY"
|
547
547
|
|
548
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
549
|
+
if expected_dtype == "":
|
550
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
551
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
552
|
+
expected_dtype = "ARRAY"
|
553
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
554
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
555
|
+
expected_dtype = "ARRAY"
|
556
|
+
else:
|
557
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
558
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
559
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
560
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
561
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
562
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
563
|
+
|
548
564
|
output_df = self._batch_inference(
|
549
565
|
dataset=dataset,
|
550
566
|
inference_method="transform",
|
@@ -532,6 +532,22 @@ class RBFSampler(BaseTransformer):
|
|
532
532
|
# each row containing a list of values.
|
533
533
|
expected_dtype = "ARRAY"
|
534
534
|
|
535
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
536
|
+
if expected_dtype == "":
|
537
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
538
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
539
|
+
expected_dtype = "ARRAY"
|
540
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
541
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
542
|
+
expected_dtype = "ARRAY"
|
543
|
+
else:
|
544
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
545
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
546
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
547
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
548
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
549
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
550
|
+
|
535
551
|
output_df = self._batch_inference(
|
536
552
|
dataset=dataset,
|
537
553
|
inference_method="transform",
|
@@ -530,6 +530,22 @@ class SkewedChi2Sampler(BaseTransformer):
|
|
530
530
|
# each row containing a list of values.
|
531
531
|
expected_dtype = "ARRAY"
|
532
532
|
|
533
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
534
|
+
if expected_dtype == "":
|
535
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
536
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
537
|
+
expected_dtype = "ARRAY"
|
538
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
539
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
540
|
+
expected_dtype = "ARRAY"
|
541
|
+
else:
|
542
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
543
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
544
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
545
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
546
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
547
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
548
|
+
|
533
549
|
output_df = self._batch_inference(
|
534
550
|
dataset=dataset,
|
535
551
|
inference_method="transform",
|