snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/telemetry.py +19 -0
- snowflake/ml/model/_client/ops/model_ops.py +16 -38
- snowflake/ml/model/_client/sql/model.py +1 -7
- snowflake/ml/model/_client/sql/model_version.py +20 -15
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
- snowflake/ml/model/type_hints.py +3 -0
- snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +63 -95
- snowflake/ml/modeling/_internal/snowpark_handlers.py +9 -6
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +16 -0
- snowflake/ml/modeling/cluster/affinity_propagation.py +16 -0
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +16 -0
- snowflake/ml/modeling/cluster/birch.py +16 -0
- snowflake/ml/modeling/cluster/bisecting_k_means.py +16 -0
- snowflake/ml/modeling/cluster/dbscan.py +16 -0
- snowflake/ml/modeling/cluster/feature_agglomeration.py +16 -0
- snowflake/ml/modeling/cluster/k_means.py +16 -0
- snowflake/ml/modeling/cluster/mean_shift.py +16 -0
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +16 -0
- snowflake/ml/modeling/cluster/optics.py +16 -0
- snowflake/ml/modeling/cluster/spectral_biclustering.py +16 -0
- snowflake/ml/modeling/cluster/spectral_clustering.py +16 -0
- snowflake/ml/modeling/cluster/spectral_coclustering.py +16 -0
- snowflake/ml/modeling/compose/column_transformer.py +16 -0
- snowflake/ml/modeling/compose/transformed_target_regressor.py +16 -0
- snowflake/ml/modeling/covariance/elliptic_envelope.py +16 -0
- snowflake/ml/modeling/covariance/empirical_covariance.py +16 -0
- snowflake/ml/modeling/covariance/graphical_lasso.py +16 -0
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +16 -0
- snowflake/ml/modeling/covariance/ledoit_wolf.py +16 -0
- snowflake/ml/modeling/covariance/min_cov_det.py +16 -0
- snowflake/ml/modeling/covariance/oas.py +16 -0
- snowflake/ml/modeling/covariance/shrunk_covariance.py +16 -0
- snowflake/ml/modeling/decomposition/dictionary_learning.py +16 -0
- snowflake/ml/modeling/decomposition/factor_analysis.py +16 -0
- snowflake/ml/modeling/decomposition/fast_ica.py +16 -0
- snowflake/ml/modeling/decomposition/incremental_pca.py +16 -0
- snowflake/ml/modeling/decomposition/kernel_pca.py +16 -0
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +16 -0
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +16 -0
- snowflake/ml/modeling/decomposition/pca.py +16 -0
- snowflake/ml/modeling/decomposition/sparse_pca.py +16 -0
- snowflake/ml/modeling/decomposition/truncated_svd.py +16 -0
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +16 -0
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +16 -0
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/bagging_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/bagging_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/isolation_forest.py +16 -0
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/stacking_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/voting_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/voting_regressor.py +16 -0
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +16 -0
- snowflake/ml/modeling/feature_selection/select_fdr.py +16 -0
- snowflake/ml/modeling/feature_selection/select_fpr.py +16 -0
- snowflake/ml/modeling/feature_selection/select_fwe.py +16 -0
- snowflake/ml/modeling/feature_selection/select_k_best.py +16 -0
- snowflake/ml/modeling/feature_selection/select_percentile.py +16 -0
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +16 -0
- snowflake/ml/modeling/feature_selection/variance_threshold.py +16 -0
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +16 -0
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +16 -0
- snowflake/ml/modeling/impute/iterative_imputer.py +16 -0
- snowflake/ml/modeling/impute/knn_imputer.py +16 -0
- snowflake/ml/modeling/impute/missing_indicator.py +16 -0
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +16 -0
- snowflake/ml/modeling/kernel_approximation/nystroem.py +16 -0
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +16 -0
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +16 -0
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +16 -0
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +16 -0
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +16 -0
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/ard_regression.py +16 -0
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +16 -0
- snowflake/ml/modeling/linear_model/elastic_net.py +16 -0
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +16 -0
- snowflake/ml/modeling/linear_model/gamma_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/huber_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/lars.py +16 -0
- snowflake/ml/modeling/linear_model/lars_cv.py +16 -0
- snowflake/ml/modeling/linear_model/lasso.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_cv.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_lars.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +16 -0
- snowflake/ml/modeling/linear_model/linear_regression.py +16 -0
- snowflake/ml/modeling/linear_model/logistic_regression.py +16 -0
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +16 -0
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +16 -0
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +16 -0
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/perceptron.py +16 -0
- snowflake/ml/modeling/linear_model/poisson_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/ransac_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/ridge.py +16 -0
- snowflake/ml/modeling/linear_model/ridge_classifier.py +16 -0
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +16 -0
- snowflake/ml/modeling/linear_model/ridge_cv.py +16 -0
- snowflake/ml/modeling/linear_model/sgd_classifier.py +16 -0
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +16 -0
- snowflake/ml/modeling/linear_model/sgd_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +16 -0
- snowflake/ml/modeling/manifold/isomap.py +16 -0
- snowflake/ml/modeling/manifold/mds.py +16 -0
- snowflake/ml/modeling/manifold/spectral_embedding.py +16 -0
- snowflake/ml/modeling/manifold/tsne.py +16 -0
- snowflake/ml/modeling/metrics/classification.py +5 -6
- snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +7 -3
- snowflake/ml/modeling/metrics/regression.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +16 -0
- snowflake/ml/modeling/mixture/gaussian_mixture.py +16 -0
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +16 -0
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +16 -0
- snowflake/ml/modeling/multiclass/output_code_classifier.py +16 -0
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/complement_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +16 -0
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +16 -0
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +16 -0
- snowflake/ml/modeling/neighbors/kernel_density.py +16 -0
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +16 -0
- snowflake/ml/modeling/neighbors/nearest_centroid.py +16 -0
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +16 -0
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +16 -0
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +16 -0
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +16 -0
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +16 -0
- snowflake/ml/modeling/neural_network/mlp_classifier.py +16 -0
- snowflake/ml/modeling/neural_network/mlp_regressor.py +16 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +16 -0
- snowflake/ml/modeling/semi_supervised/label_propagation.py +16 -0
- snowflake/ml/modeling/semi_supervised/label_spreading.py +16 -0
- snowflake/ml/modeling/svm/linear_svc.py +16 -0
- snowflake/ml/modeling/svm/linear_svr.py +16 -0
- snowflake/ml/modeling/svm/nu_svc.py +16 -0
- snowflake/ml/modeling/svm/nu_svr.py +16 -0
- snowflake/ml/modeling/svm/svc.py +16 -0
- snowflake/ml/modeling/svm/svr.py +16 -0
- snowflake/ml/modeling/tree/decision_tree_classifier.py +16 -0
- snowflake/ml/modeling/tree/decision_tree_regressor.py +16 -0
- snowflake/ml/modeling/tree/extra_tree_classifier.py +16 -0
- snowflake/ml/modeling/tree/extra_tree_regressor.py +16 -0
- snowflake/ml/modeling/xgboost/xgb_classifier.py +16 -0
- snowflake/ml/modeling/xgboost/xgb_regressor.py +16 -0
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +16 -0
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +16 -0
- snowflake/ml/registry/registry.py +2 -0
- snowflake/ml/version.py +1 -1
- snowflake_ml_python-1.2.1.dist-info/LICENSE.txt +202 -0
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/METADATA +261 -50
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/RECORD +189 -186
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/WHEEL +2 -1
- snowflake_ml_python-1.2.1.dist-info/top_level.txt +1 -0
@@ -620,6 +620,22 @@ class GaussianMixture(BaseTransformer):
|
|
620
620
|
# each row containing a list of values.
|
621
621
|
expected_dtype = "ARRAY"
|
622
622
|
|
623
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
624
|
+
if expected_dtype == "":
|
625
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
626
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
627
|
+
expected_dtype = "ARRAY"
|
628
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
629
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
630
|
+
expected_dtype = "ARRAY"
|
631
|
+
else:
|
632
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
633
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
634
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
635
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
636
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
637
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
638
|
+
|
623
639
|
output_df = self._batch_inference(
|
624
640
|
dataset=dataset,
|
625
641
|
inference_method="transform",
|
@@ -532,6 +532,22 @@ class OneVsOneClassifier(BaseTransformer):
|
|
532
532
|
# each row containing a list of values.
|
533
533
|
expected_dtype = "ARRAY"
|
534
534
|
|
535
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
536
|
+
if expected_dtype == "":
|
537
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
538
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
539
|
+
expected_dtype = "ARRAY"
|
540
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
541
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
542
|
+
expected_dtype = "ARRAY"
|
543
|
+
else:
|
544
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
545
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
546
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
547
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
548
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
549
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
550
|
+
|
535
551
|
output_df = self._batch_inference(
|
536
552
|
dataset=dataset,
|
537
553
|
inference_method="transform",
|
@@ -541,6 +541,22 @@ class OneVsRestClassifier(BaseTransformer):
|
|
541
541
|
# each row containing a list of values.
|
542
542
|
expected_dtype = "ARRAY"
|
543
543
|
|
544
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
545
|
+
if expected_dtype == "":
|
546
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
547
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
548
|
+
expected_dtype = "ARRAY"
|
549
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
550
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
551
|
+
expected_dtype = "ARRAY"
|
552
|
+
else:
|
553
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
554
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
555
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
556
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
557
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
558
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
559
|
+
|
544
560
|
output_df = self._batch_inference(
|
545
561
|
dataset=dataset,
|
546
562
|
inference_method="transform",
|
@@ -544,6 +544,22 @@ class OutputCodeClassifier(BaseTransformer):
|
|
544
544
|
# each row containing a list of values.
|
545
545
|
expected_dtype = "ARRAY"
|
546
546
|
|
547
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
548
|
+
if expected_dtype == "":
|
549
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
550
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
551
|
+
expected_dtype = "ARRAY"
|
552
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
553
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
554
|
+
expected_dtype = "ARRAY"
|
555
|
+
else:
|
556
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
557
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
558
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
559
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
560
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
561
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
562
|
+
|
547
563
|
output_df = self._batch_inference(
|
548
564
|
dataset=dataset,
|
549
565
|
inference_method="transform",
|
@@ -544,6 +544,22 @@ class BernoulliNB(BaseTransformer):
|
|
544
544
|
# each row containing a list of values.
|
545
545
|
expected_dtype = "ARRAY"
|
546
546
|
|
547
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
548
|
+
if expected_dtype == "":
|
549
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
550
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
551
|
+
expected_dtype = "ARRAY"
|
552
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
553
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
554
|
+
expected_dtype = "ARRAY"
|
555
|
+
else:
|
556
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
557
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
558
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
559
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
560
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
561
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
562
|
+
|
547
563
|
output_df = self._batch_inference(
|
548
564
|
dataset=dataset,
|
549
565
|
inference_method="transform",
|
@@ -550,6 +550,22 @@ class CategoricalNB(BaseTransformer):
|
|
550
550
|
# each row containing a list of values.
|
551
551
|
expected_dtype = "ARRAY"
|
552
552
|
|
553
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
554
|
+
if expected_dtype == "":
|
555
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
556
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
557
|
+
expected_dtype = "ARRAY"
|
558
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
559
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
560
|
+
expected_dtype = "ARRAY"
|
561
|
+
else:
|
562
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
563
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
564
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
565
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
566
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
567
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
568
|
+
|
553
569
|
output_df = self._batch_inference(
|
554
570
|
dataset=dataset,
|
555
571
|
inference_method="transform",
|
@@ -544,6 +544,22 @@ class ComplementNB(BaseTransformer):
|
|
544
544
|
# each row containing a list of values.
|
545
545
|
expected_dtype = "ARRAY"
|
546
546
|
|
547
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
548
|
+
if expected_dtype == "":
|
549
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
550
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
551
|
+
expected_dtype = "ARRAY"
|
552
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
553
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
554
|
+
expected_dtype = "ARRAY"
|
555
|
+
else:
|
556
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
557
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
558
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
559
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
560
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
561
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
562
|
+
|
547
563
|
output_df = self._batch_inference(
|
548
564
|
dataset=dataset,
|
549
565
|
inference_method="transform",
|
@@ -525,6 +525,22 @@ class GaussianNB(BaseTransformer):
|
|
525
525
|
# each row containing a list of values.
|
526
526
|
expected_dtype = "ARRAY"
|
527
527
|
|
528
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
529
|
+
if expected_dtype == "":
|
530
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
531
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
532
|
+
expected_dtype = "ARRAY"
|
533
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
534
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
535
|
+
expected_dtype = "ARRAY"
|
536
|
+
else:
|
537
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
538
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
539
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
540
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
541
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
542
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
543
|
+
|
528
544
|
output_df = self._batch_inference(
|
529
545
|
dataset=dataset,
|
530
546
|
inference_method="transform",
|
@@ -538,6 +538,22 @@ class MultinomialNB(BaseTransformer):
|
|
538
538
|
# each row containing a list of values.
|
539
539
|
expected_dtype = "ARRAY"
|
540
540
|
|
541
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
542
|
+
if expected_dtype == "":
|
543
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
544
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
545
|
+
expected_dtype = "ARRAY"
|
546
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
547
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
548
|
+
expected_dtype = "ARRAY"
|
549
|
+
else:
|
550
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
551
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
552
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
553
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
554
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
555
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
556
|
+
|
541
557
|
output_df = self._batch_inference(
|
542
558
|
dataset=dataset,
|
543
559
|
inference_method="transform",
|
@@ -595,6 +595,22 @@ class KNeighborsClassifier(BaseTransformer):
|
|
595
595
|
# each row containing a list of values.
|
596
596
|
expected_dtype = "ARRAY"
|
597
597
|
|
598
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
599
|
+
if expected_dtype == "":
|
600
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
601
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
602
|
+
expected_dtype = "ARRAY"
|
603
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
604
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
605
|
+
expected_dtype = "ARRAY"
|
606
|
+
else:
|
607
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
608
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
609
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
610
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
611
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
612
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
613
|
+
|
598
614
|
output_df = self._batch_inference(
|
599
615
|
dataset=dataset,
|
600
616
|
inference_method="transform",
|
@@ -597,6 +597,22 @@ class KNeighborsRegressor(BaseTransformer):
|
|
597
597
|
# each row containing a list of values.
|
598
598
|
expected_dtype = "ARRAY"
|
599
599
|
|
600
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
601
|
+
if expected_dtype == "":
|
602
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
603
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
604
|
+
expected_dtype = "ARRAY"
|
605
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
606
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
607
|
+
expected_dtype = "ARRAY"
|
608
|
+
else:
|
609
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
610
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
611
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
612
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
613
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
614
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
615
|
+
|
600
616
|
output_df = self._batch_inference(
|
601
617
|
dataset=dataset,
|
602
618
|
inference_method="transform",
|
@@ -572,6 +572,22 @@ class KernelDensity(BaseTransformer):
|
|
572
572
|
# each row containing a list of values.
|
573
573
|
expected_dtype = "ARRAY"
|
574
574
|
|
575
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
576
|
+
if expected_dtype == "":
|
577
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
578
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
579
|
+
expected_dtype = "ARRAY"
|
580
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
581
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
582
|
+
expected_dtype = "ARRAY"
|
583
|
+
else:
|
584
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
585
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
586
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
587
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
588
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
589
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
590
|
+
|
575
591
|
output_df = self._batch_inference(
|
576
592
|
dataset=dataset,
|
577
593
|
inference_method="transform",
|
@@ -602,6 +602,22 @@ class LocalOutlierFactor(BaseTransformer):
|
|
602
602
|
# each row containing a list of values.
|
603
603
|
expected_dtype = "ARRAY"
|
604
604
|
|
605
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
606
|
+
if expected_dtype == "":
|
607
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
608
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
609
|
+
expected_dtype = "ARRAY"
|
610
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
611
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
612
|
+
expected_dtype = "ARRAY"
|
613
|
+
else:
|
614
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
615
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
616
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
617
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
618
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
619
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
620
|
+
|
605
621
|
output_df = self._batch_inference(
|
606
622
|
dataset=dataset,
|
607
623
|
inference_method="transform",
|
@@ -535,6 +535,22 @@ class NearestCentroid(BaseTransformer):
|
|
535
535
|
# each row containing a list of values.
|
536
536
|
expected_dtype = "ARRAY"
|
537
537
|
|
538
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
539
|
+
if expected_dtype == "":
|
540
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
541
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
542
|
+
expected_dtype = "ARRAY"
|
543
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
544
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
545
|
+
expected_dtype = "ARRAY"
|
546
|
+
else:
|
547
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
548
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
549
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
550
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
551
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
552
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
553
|
+
|
538
554
|
output_df = self._batch_inference(
|
539
555
|
dataset=dataset,
|
540
556
|
inference_method="transform",
|
@@ -583,6 +583,22 @@ class NearestNeighbors(BaseTransformer):
|
|
583
583
|
# each row containing a list of values.
|
584
584
|
expected_dtype = "ARRAY"
|
585
585
|
|
586
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
587
|
+
if expected_dtype == "":
|
588
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
589
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
590
|
+
expected_dtype = "ARRAY"
|
591
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
592
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
593
|
+
expected_dtype = "ARRAY"
|
594
|
+
else:
|
595
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
596
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
597
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
598
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
599
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
600
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
601
|
+
|
586
602
|
output_df = self._batch_inference(
|
587
603
|
dataset=dataset,
|
588
604
|
inference_method="transform",
|
@@ -606,6 +606,22 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
|
|
606
606
|
# each row containing a list of values.
|
607
607
|
expected_dtype = "ARRAY"
|
608
608
|
|
609
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
610
|
+
if expected_dtype == "":
|
611
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
612
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
613
|
+
expected_dtype = "ARRAY"
|
614
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
615
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
616
|
+
expected_dtype = "ARRAY"
|
617
|
+
else:
|
618
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
619
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
620
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
621
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
622
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
623
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
624
|
+
|
609
625
|
output_df = self._batch_inference(
|
610
626
|
dataset=dataset,
|
611
627
|
inference_method="transform",
|
@@ -607,6 +607,22 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
607
607
|
# each row containing a list of values.
|
608
608
|
expected_dtype = "ARRAY"
|
609
609
|
|
610
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
611
|
+
if expected_dtype == "":
|
612
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
613
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
614
|
+
expected_dtype = "ARRAY"
|
615
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
616
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
617
|
+
expected_dtype = "ARRAY"
|
618
|
+
else:
|
619
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
620
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
621
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
622
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
623
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
624
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
625
|
+
|
610
626
|
output_df = self._batch_inference(
|
611
627
|
dataset=dataset,
|
612
628
|
inference_method="transform",
|
@@ -597,6 +597,22 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
597
597
|
# each row containing a list of values.
|
598
598
|
expected_dtype = "ARRAY"
|
599
599
|
|
600
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
601
|
+
if expected_dtype == "":
|
602
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
603
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
604
|
+
expected_dtype = "ARRAY"
|
605
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
606
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
607
|
+
expected_dtype = "ARRAY"
|
608
|
+
else:
|
609
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
610
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
611
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
612
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
613
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
614
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
615
|
+
|
600
616
|
output_df = self._batch_inference(
|
601
617
|
dataset=dataset,
|
602
618
|
inference_method="transform",
|
@@ -554,6 +554,22 @@ class BernoulliRBM(BaseTransformer):
|
|
554
554
|
# each row containing a list of values.
|
555
555
|
expected_dtype = "ARRAY"
|
556
556
|
|
557
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
558
|
+
if expected_dtype == "":
|
559
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
560
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
561
|
+
expected_dtype = "ARRAY"
|
562
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
563
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
564
|
+
expected_dtype = "ARRAY"
|
565
|
+
else:
|
566
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
567
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
568
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
569
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
570
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
571
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
572
|
+
|
557
573
|
output_df = self._batch_inference(
|
558
574
|
dataset=dataset,
|
559
575
|
inference_method="transform",
|
@@ -709,6 +709,22 @@ class MLPClassifier(BaseTransformer):
|
|
709
709
|
# each row containing a list of values.
|
710
710
|
expected_dtype = "ARRAY"
|
711
711
|
|
712
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
713
|
+
if expected_dtype == "":
|
714
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
715
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
716
|
+
expected_dtype = "ARRAY"
|
717
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
718
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
719
|
+
expected_dtype = "ARRAY"
|
720
|
+
else:
|
721
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
722
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
723
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
724
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
725
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
726
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
727
|
+
|
712
728
|
output_df = self._batch_inference(
|
713
729
|
dataset=dataset,
|
714
730
|
inference_method="transform",
|
@@ -705,6 +705,22 @@ class MLPRegressor(BaseTransformer):
|
|
705
705
|
# each row containing a list of values.
|
706
706
|
expected_dtype = "ARRAY"
|
707
707
|
|
708
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
709
|
+
if expected_dtype == "":
|
710
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
711
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
712
|
+
expected_dtype = "ARRAY"
|
713
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
714
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
715
|
+
expected_dtype = "ARRAY"
|
716
|
+
else:
|
717
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
718
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
719
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
720
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
721
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
722
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
723
|
+
|
708
724
|
output_df = self._batch_inference(
|
709
725
|
dataset=dataset,
|
710
726
|
inference_method="transform",
|
@@ -544,6 +544,22 @@ class PolynomialFeatures(BaseTransformer):
|
|
544
544
|
# each row containing a list of values.
|
545
545
|
expected_dtype = "ARRAY"
|
546
546
|
|
547
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
548
|
+
if expected_dtype == "":
|
549
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
550
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
551
|
+
expected_dtype = "ARRAY"
|
552
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
553
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
554
|
+
expected_dtype = "ARRAY"
|
555
|
+
else:
|
556
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
557
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
558
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
559
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
560
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
561
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
562
|
+
|
547
563
|
output_df = self._batch_inference(
|
548
564
|
dataset=dataset,
|
549
565
|
inference_method="transform",
|
@@ -550,6 +550,22 @@ class LabelPropagation(BaseTransformer):
|
|
550
550
|
# each row containing a list of values.
|
551
551
|
expected_dtype = "ARRAY"
|
552
552
|
|
553
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
554
|
+
if expected_dtype == "":
|
555
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
556
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
557
|
+
expected_dtype = "ARRAY"
|
558
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
559
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
560
|
+
expected_dtype = "ARRAY"
|
561
|
+
else:
|
562
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
563
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
564
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
565
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
566
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
567
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
568
|
+
|
553
569
|
output_df = self._batch_inference(
|
554
570
|
dataset=dataset,
|
555
571
|
inference_method="transform",
|
@@ -559,6 +559,22 @@ class LabelSpreading(BaseTransformer):
|
|
559
559
|
# each row containing a list of values.
|
560
560
|
expected_dtype = "ARRAY"
|
561
561
|
|
562
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
563
|
+
if expected_dtype == "":
|
564
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
565
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
566
|
+
expected_dtype = "ARRAY"
|
567
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
568
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
569
|
+
expected_dtype = "ARRAY"
|
570
|
+
else:
|
571
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
572
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
573
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
574
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
575
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
576
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
577
|
+
|
562
578
|
output_df = self._batch_inference(
|
563
579
|
dataset=dataset,
|
564
580
|
inference_method="transform",
|