snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/telemetry.py +19 -0
- snowflake/ml/model/_client/ops/model_ops.py +16 -38
- snowflake/ml/model/_client/sql/model.py +1 -7
- snowflake/ml/model/_client/sql/model_version.py +20 -15
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
- snowflake/ml/model/type_hints.py +3 -0
- snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +63 -95
- snowflake/ml/modeling/_internal/snowpark_handlers.py +9 -6
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +16 -0
- snowflake/ml/modeling/cluster/affinity_propagation.py +16 -0
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +16 -0
- snowflake/ml/modeling/cluster/birch.py +16 -0
- snowflake/ml/modeling/cluster/bisecting_k_means.py +16 -0
- snowflake/ml/modeling/cluster/dbscan.py +16 -0
- snowflake/ml/modeling/cluster/feature_agglomeration.py +16 -0
- snowflake/ml/modeling/cluster/k_means.py +16 -0
- snowflake/ml/modeling/cluster/mean_shift.py +16 -0
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +16 -0
- snowflake/ml/modeling/cluster/optics.py +16 -0
- snowflake/ml/modeling/cluster/spectral_biclustering.py +16 -0
- snowflake/ml/modeling/cluster/spectral_clustering.py +16 -0
- snowflake/ml/modeling/cluster/spectral_coclustering.py +16 -0
- snowflake/ml/modeling/compose/column_transformer.py +16 -0
- snowflake/ml/modeling/compose/transformed_target_regressor.py +16 -0
- snowflake/ml/modeling/covariance/elliptic_envelope.py +16 -0
- snowflake/ml/modeling/covariance/empirical_covariance.py +16 -0
- snowflake/ml/modeling/covariance/graphical_lasso.py +16 -0
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +16 -0
- snowflake/ml/modeling/covariance/ledoit_wolf.py +16 -0
- snowflake/ml/modeling/covariance/min_cov_det.py +16 -0
- snowflake/ml/modeling/covariance/oas.py +16 -0
- snowflake/ml/modeling/covariance/shrunk_covariance.py +16 -0
- snowflake/ml/modeling/decomposition/dictionary_learning.py +16 -0
- snowflake/ml/modeling/decomposition/factor_analysis.py +16 -0
- snowflake/ml/modeling/decomposition/fast_ica.py +16 -0
- snowflake/ml/modeling/decomposition/incremental_pca.py +16 -0
- snowflake/ml/modeling/decomposition/kernel_pca.py +16 -0
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +16 -0
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +16 -0
- snowflake/ml/modeling/decomposition/pca.py +16 -0
- snowflake/ml/modeling/decomposition/sparse_pca.py +16 -0
- snowflake/ml/modeling/decomposition/truncated_svd.py +16 -0
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +16 -0
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +16 -0
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/bagging_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/bagging_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/isolation_forest.py +16 -0
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/stacking_regressor.py +16 -0
- snowflake/ml/modeling/ensemble/voting_classifier.py +16 -0
- snowflake/ml/modeling/ensemble/voting_regressor.py +16 -0
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +16 -0
- snowflake/ml/modeling/feature_selection/select_fdr.py +16 -0
- snowflake/ml/modeling/feature_selection/select_fpr.py +16 -0
- snowflake/ml/modeling/feature_selection/select_fwe.py +16 -0
- snowflake/ml/modeling/feature_selection/select_k_best.py +16 -0
- snowflake/ml/modeling/feature_selection/select_percentile.py +16 -0
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +16 -0
- snowflake/ml/modeling/feature_selection/variance_threshold.py +16 -0
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +16 -0
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +16 -0
- snowflake/ml/modeling/impute/iterative_imputer.py +16 -0
- snowflake/ml/modeling/impute/knn_imputer.py +16 -0
- snowflake/ml/modeling/impute/missing_indicator.py +16 -0
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +16 -0
- snowflake/ml/modeling/kernel_approximation/nystroem.py +16 -0
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +16 -0
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +16 -0
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +16 -0
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +16 -0
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +16 -0
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/ard_regression.py +16 -0
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +16 -0
- snowflake/ml/modeling/linear_model/elastic_net.py +16 -0
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +16 -0
- snowflake/ml/modeling/linear_model/gamma_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/huber_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/lars.py +16 -0
- snowflake/ml/modeling/linear_model/lars_cv.py +16 -0
- snowflake/ml/modeling/linear_model/lasso.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_cv.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_lars.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +16 -0
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +16 -0
- snowflake/ml/modeling/linear_model/linear_regression.py +16 -0
- snowflake/ml/modeling/linear_model/logistic_regression.py +16 -0
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +16 -0
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +16 -0
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +16 -0
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +16 -0
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/perceptron.py +16 -0
- snowflake/ml/modeling/linear_model/poisson_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/ransac_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/ridge.py +16 -0
- snowflake/ml/modeling/linear_model/ridge_classifier.py +16 -0
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +16 -0
- snowflake/ml/modeling/linear_model/ridge_cv.py +16 -0
- snowflake/ml/modeling/linear_model/sgd_classifier.py +16 -0
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +16 -0
- snowflake/ml/modeling/linear_model/sgd_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +16 -0
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +16 -0
- snowflake/ml/modeling/manifold/isomap.py +16 -0
- snowflake/ml/modeling/manifold/mds.py +16 -0
- snowflake/ml/modeling/manifold/spectral_embedding.py +16 -0
- snowflake/ml/modeling/manifold/tsne.py +16 -0
- snowflake/ml/modeling/metrics/classification.py +5 -6
- snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +7 -3
- snowflake/ml/modeling/metrics/regression.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +16 -0
- snowflake/ml/modeling/mixture/gaussian_mixture.py +16 -0
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +16 -0
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +16 -0
- snowflake/ml/modeling/multiclass/output_code_classifier.py +16 -0
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/complement_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +16 -0
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +16 -0
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +16 -0
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +16 -0
- snowflake/ml/modeling/neighbors/kernel_density.py +16 -0
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +16 -0
- snowflake/ml/modeling/neighbors/nearest_centroid.py +16 -0
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +16 -0
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +16 -0
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +16 -0
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +16 -0
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +16 -0
- snowflake/ml/modeling/neural_network/mlp_classifier.py +16 -0
- snowflake/ml/modeling/neural_network/mlp_regressor.py +16 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +16 -0
- snowflake/ml/modeling/semi_supervised/label_propagation.py +16 -0
- snowflake/ml/modeling/semi_supervised/label_spreading.py +16 -0
- snowflake/ml/modeling/svm/linear_svc.py +16 -0
- snowflake/ml/modeling/svm/linear_svr.py +16 -0
- snowflake/ml/modeling/svm/nu_svc.py +16 -0
- snowflake/ml/modeling/svm/nu_svr.py +16 -0
- snowflake/ml/modeling/svm/svc.py +16 -0
- snowflake/ml/modeling/svm/svr.py +16 -0
- snowflake/ml/modeling/tree/decision_tree_classifier.py +16 -0
- snowflake/ml/modeling/tree/decision_tree_regressor.py +16 -0
- snowflake/ml/modeling/tree/extra_tree_classifier.py +16 -0
- snowflake/ml/modeling/tree/extra_tree_regressor.py +16 -0
- snowflake/ml/modeling/xgboost/xgb_classifier.py +16 -0
- snowflake/ml/modeling/xgboost/xgb_regressor.py +16 -0
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +16 -0
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +16 -0
- snowflake/ml/registry/registry.py +2 -0
- snowflake/ml/version.py +1 -1
- snowflake_ml_python-1.2.1.dist-info/LICENSE.txt +202 -0
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/METADATA +261 -50
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/RECORD +189 -186
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.1.dist-info}/WHEEL +2 -1
- snowflake_ml_python-1.2.1.dist-info/top_level.txt +1 -0
@@ -306,7 +306,7 @@ class SnowparkHandlers:
|
|
306
306
|
input_cols: List[str],
|
307
307
|
label_cols: List[str],
|
308
308
|
sample_weight_col: Optional[str],
|
309
|
-
|
309
|
+
score_statement_params: Dict[str, str],
|
310
310
|
) -> float:
|
311
311
|
import inspect
|
312
312
|
import os
|
@@ -317,13 +317,13 @@ class SnowparkHandlers:
|
|
317
317
|
importlib.import_module(import_name)
|
318
318
|
|
319
319
|
for query in sql_queries[:-1]:
|
320
|
-
_ = session.sql(query).collect(statement_params=
|
320
|
+
_ = session.sql(query).collect(statement_params=score_statement_params)
|
321
321
|
sp_df = session.sql(sql_queries[-1])
|
322
|
-
df: pd.DataFrame = sp_df.to_pandas(statement_params=
|
322
|
+
df: pd.DataFrame = sp_df.to_pandas(statement_params=score_statement_params)
|
323
323
|
df.columns = sp_df.columns
|
324
324
|
|
325
325
|
local_score_file_name = get_temp_file_path()
|
326
|
-
session.file.get(stage_score_file_name, local_score_file_name, statement_params=
|
326
|
+
session.file.get(stage_score_file_name, local_score_file_name, statement_params=score_statement_params)
|
327
327
|
|
328
328
|
local_score_file_name_path = os.path.join(local_score_file_name, os.listdir(local_score_file_name)[0])
|
329
329
|
with open(local_score_file_name_path, mode="r+b") as local_score_file_obj:
|
@@ -348,7 +348,7 @@ class SnowparkHandlers:
|
|
348
348
|
return result
|
349
349
|
|
350
350
|
# Call score sproc
|
351
|
-
|
351
|
+
score_statement_params = telemetry.get_function_usage_statement_params(
|
352
352
|
project=_PROJECT,
|
353
353
|
subproject=self._subproject,
|
354
354
|
function_name=telemetry.get_statement_params_full_func_name(
|
@@ -357,6 +357,8 @@ class SnowparkHandlers:
|
|
357
357
|
api_calls=[Session.call],
|
358
358
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
359
359
|
)
|
360
|
+
|
361
|
+
kwargs = telemetry.get_sproc_statement_params_kwargs(score_wrapper_sproc, score_statement_params)
|
360
362
|
score: float = score_wrapper_sproc(
|
361
363
|
session,
|
362
364
|
queries,
|
@@ -364,7 +366,8 @@ class SnowparkHandlers:
|
|
364
366
|
input_cols,
|
365
367
|
label_cols,
|
366
368
|
sample_weight_col,
|
367
|
-
|
369
|
+
score_statement_params,
|
370
|
+
**kwargs,
|
368
371
|
)
|
369
372
|
|
370
373
|
cleanup_temp_files([local_score_file_name])
|
@@ -589,6 +589,22 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
589
589
|
# each row containing a list of values.
|
590
590
|
expected_dtype = "ARRAY"
|
591
591
|
|
592
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
593
|
+
if expected_dtype == "":
|
594
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
595
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
596
|
+
expected_dtype = "ARRAY"
|
597
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
598
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
599
|
+
expected_dtype = "ARRAY"
|
600
|
+
else:
|
601
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
602
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
603
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
604
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
605
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
606
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
607
|
+
|
592
608
|
output_df = self._batch_inference(
|
593
609
|
dataset=dataset,
|
594
610
|
inference_method="transform",
|
@@ -564,6 +564,22 @@ class AffinityPropagation(BaseTransformer):
|
|
564
564
|
# each row containing a list of values.
|
565
565
|
expected_dtype = "ARRAY"
|
566
566
|
|
567
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
568
|
+
if expected_dtype == "":
|
569
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
570
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
571
|
+
expected_dtype = "ARRAY"
|
572
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
573
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
574
|
+
expected_dtype = "ARRAY"
|
575
|
+
else:
|
576
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
577
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
578
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
579
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
580
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
581
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
582
|
+
|
567
583
|
output_df = self._batch_inference(
|
568
584
|
dataset=dataset,
|
569
585
|
inference_method="transform",
|
@@ -595,6 +595,22 @@ class AgglomerativeClustering(BaseTransformer):
|
|
595
595
|
# each row containing a list of values.
|
596
596
|
expected_dtype = "ARRAY"
|
597
597
|
|
598
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
599
|
+
if expected_dtype == "":
|
600
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
601
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
602
|
+
expected_dtype = "ARRAY"
|
603
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
604
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
605
|
+
expected_dtype = "ARRAY"
|
606
|
+
else:
|
607
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
608
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
609
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
610
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
611
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
612
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
613
|
+
|
598
614
|
output_df = self._batch_inference(
|
599
615
|
dataset=dataset,
|
600
616
|
inference_method="transform",
|
@@ -557,6 +557,22 @@ class Birch(BaseTransformer):
|
|
557
557
|
# each row containing a list of values.
|
558
558
|
expected_dtype = "ARRAY"
|
559
559
|
|
560
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
561
|
+
if expected_dtype == "":
|
562
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
563
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
564
|
+
expected_dtype = "ARRAY"
|
565
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
566
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
567
|
+
expected_dtype = "ARRAY"
|
568
|
+
else:
|
569
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
570
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
571
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
572
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
573
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
574
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
575
|
+
|
560
576
|
output_df = self._batch_inference(
|
561
577
|
dataset=dataset,
|
562
578
|
inference_method="transform",
|
@@ -606,6 +606,22 @@ class BisectingKMeans(BaseTransformer):
|
|
606
606
|
# each row containing a list of values.
|
607
607
|
expected_dtype = "ARRAY"
|
608
608
|
|
609
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
610
|
+
if expected_dtype == "":
|
611
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
612
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
613
|
+
expected_dtype = "ARRAY"
|
614
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
615
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
616
|
+
expected_dtype = "ARRAY"
|
617
|
+
else:
|
618
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
619
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
620
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
621
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
622
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
623
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
624
|
+
|
609
625
|
output_df = self._batch_inference(
|
610
626
|
dataset=dataset,
|
611
627
|
inference_method="transform",
|
@@ -570,6 +570,22 @@ class DBSCAN(BaseTransformer):
|
|
570
570
|
# each row containing a list of values.
|
571
571
|
expected_dtype = "ARRAY"
|
572
572
|
|
573
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
574
|
+
if expected_dtype == "":
|
575
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
576
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
577
|
+
expected_dtype = "ARRAY"
|
578
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
579
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
580
|
+
expected_dtype = "ARRAY"
|
581
|
+
else:
|
582
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
583
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
584
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
585
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
586
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
587
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
588
|
+
|
573
589
|
output_df = self._batch_inference(
|
574
590
|
dataset=dataset,
|
575
591
|
inference_method="transform",
|
@@ -604,6 +604,22 @@ class FeatureAgglomeration(BaseTransformer):
|
|
604
604
|
# each row containing a list of values.
|
605
605
|
expected_dtype = "ARRAY"
|
606
606
|
|
607
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
608
|
+
if expected_dtype == "":
|
609
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
610
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
611
|
+
expected_dtype = "ARRAY"
|
612
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
613
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
614
|
+
expected_dtype = "ARRAY"
|
615
|
+
else:
|
616
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
617
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
618
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
619
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
620
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
621
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
622
|
+
|
607
623
|
output_df = self._batch_inference(
|
608
624
|
dataset=dataset,
|
609
625
|
inference_method="transform",
|
@@ -601,6 +601,22 @@ class KMeans(BaseTransformer):
|
|
601
601
|
# each row containing a list of values.
|
602
602
|
expected_dtype = "ARRAY"
|
603
603
|
|
604
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
605
|
+
if expected_dtype == "":
|
606
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
607
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
608
|
+
expected_dtype = "ARRAY"
|
609
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
610
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
611
|
+
expected_dtype = "ARRAY"
|
612
|
+
else:
|
613
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
614
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
615
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
616
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
617
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
618
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
619
|
+
|
604
620
|
output_df = self._batch_inference(
|
605
621
|
dataset=dataset,
|
606
622
|
inference_method="transform",
|
@@ -575,6 +575,22 @@ class MeanShift(BaseTransformer):
|
|
575
575
|
# each row containing a list of values.
|
576
576
|
expected_dtype = "ARRAY"
|
577
577
|
|
578
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
579
|
+
if expected_dtype == "":
|
580
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
581
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
582
|
+
expected_dtype = "ARRAY"
|
583
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
584
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
585
|
+
expected_dtype = "ARRAY"
|
586
|
+
else:
|
587
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
588
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
589
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
590
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
591
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
592
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
593
|
+
|
578
594
|
output_df = self._batch_inference(
|
579
595
|
dataset=dataset,
|
580
596
|
inference_method="transform",
|
@@ -627,6 +627,22 @@ class MiniBatchKMeans(BaseTransformer):
|
|
627
627
|
# each row containing a list of values.
|
628
628
|
expected_dtype = "ARRAY"
|
629
629
|
|
630
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
631
|
+
if expected_dtype == "":
|
632
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
633
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
634
|
+
expected_dtype = "ARRAY"
|
635
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
636
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
637
|
+
expected_dtype = "ARRAY"
|
638
|
+
else:
|
639
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
640
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
641
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
642
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
643
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
644
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
645
|
+
|
630
646
|
output_df = self._batch_inference(
|
631
647
|
dataset=dataset,
|
632
648
|
inference_method="transform",
|
@@ -643,6 +643,22 @@ class OPTICS(BaseTransformer):
|
|
643
643
|
# each row containing a list of values.
|
644
644
|
expected_dtype = "ARRAY"
|
645
645
|
|
646
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
647
|
+
if expected_dtype == "":
|
648
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
649
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
650
|
+
expected_dtype = "ARRAY"
|
651
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
652
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
653
|
+
expected_dtype = "ARRAY"
|
654
|
+
else:
|
655
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
656
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
657
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
658
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
659
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
660
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
661
|
+
|
646
662
|
output_df = self._batch_inference(
|
647
663
|
dataset=dataset,
|
648
664
|
inference_method="transform",
|
@@ -581,6 +581,22 @@ class SpectralBiclustering(BaseTransformer):
|
|
581
581
|
# each row containing a list of values.
|
582
582
|
expected_dtype = "ARRAY"
|
583
583
|
|
584
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
585
|
+
if expected_dtype == "":
|
586
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
587
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
588
|
+
expected_dtype = "ARRAY"
|
589
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
590
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
591
|
+
expected_dtype = "ARRAY"
|
592
|
+
else:
|
593
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
594
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
595
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
596
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
597
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
598
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
599
|
+
|
584
600
|
output_df = self._batch_inference(
|
585
601
|
dataset=dataset,
|
586
602
|
inference_method="transform",
|
@@ -639,6 +639,22 @@ class SpectralClustering(BaseTransformer):
|
|
639
639
|
# each row containing a list of values.
|
640
640
|
expected_dtype = "ARRAY"
|
641
641
|
|
642
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
643
|
+
if expected_dtype == "":
|
644
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
645
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
646
|
+
expected_dtype = "ARRAY"
|
647
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
648
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
649
|
+
expected_dtype = "ARRAY"
|
650
|
+
else:
|
651
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
652
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
653
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
654
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
655
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
656
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
657
|
+
|
642
658
|
output_df = self._batch_inference(
|
643
659
|
dataset=dataset,
|
644
660
|
inference_method="transform",
|
@@ -560,6 +560,22 @@ class SpectralCoclustering(BaseTransformer):
|
|
560
560
|
# each row containing a list of values.
|
561
561
|
expected_dtype = "ARRAY"
|
562
562
|
|
563
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
564
|
+
if expected_dtype == "":
|
565
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
566
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
567
|
+
expected_dtype = "ARRAY"
|
568
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
569
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
570
|
+
expected_dtype = "ARRAY"
|
571
|
+
else:
|
572
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
573
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
574
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
575
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
576
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
577
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
578
|
+
|
563
579
|
output_df = self._batch_inference(
|
564
580
|
dataset=dataset,
|
565
581
|
inference_method="transform",
|
@@ -592,6 +592,22 @@ class ColumnTransformer(BaseTransformer):
|
|
592
592
|
# each row containing a list of values.
|
593
593
|
expected_dtype = "ARRAY"
|
594
594
|
|
595
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
596
|
+
if expected_dtype == "":
|
597
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
598
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
599
|
+
expected_dtype = "ARRAY"
|
600
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
601
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
602
|
+
expected_dtype = "ARRAY"
|
603
|
+
else:
|
604
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
605
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
606
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
607
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
608
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
609
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
610
|
+
|
595
611
|
output_df = self._batch_inference(
|
596
612
|
dataset=dataset,
|
597
613
|
inference_method="transform",
|
@@ -553,6 +553,22 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
553
553
|
# each row containing a list of values.
|
554
554
|
expected_dtype = "ARRAY"
|
555
555
|
|
556
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
557
|
+
if expected_dtype == "":
|
558
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
559
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
560
|
+
expected_dtype = "ARRAY"
|
561
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
562
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
563
|
+
expected_dtype = "ARRAY"
|
564
|
+
else:
|
565
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
566
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
567
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
568
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
569
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
570
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
571
|
+
|
556
572
|
output_df = self._batch_inference(
|
557
573
|
dataset=dataset,
|
558
574
|
inference_method="transform",
|
@@ -548,6 +548,22 @@ class EllipticEnvelope(BaseTransformer):
|
|
548
548
|
# each row containing a list of values.
|
549
549
|
expected_dtype = "ARRAY"
|
550
550
|
|
551
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
552
|
+
if expected_dtype == "":
|
553
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
554
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
555
|
+
expected_dtype = "ARRAY"
|
556
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
557
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
558
|
+
expected_dtype = "ARRAY"
|
559
|
+
else:
|
560
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
561
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
562
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
563
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
564
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
565
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
566
|
+
|
551
567
|
output_df = self._batch_inference(
|
552
568
|
dataset=dataset,
|
553
569
|
inference_method="transform",
|
@@ -522,6 +522,22 @@ class EmpiricalCovariance(BaseTransformer):
|
|
522
522
|
# each row containing a list of values.
|
523
523
|
expected_dtype = "ARRAY"
|
524
524
|
|
525
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
526
|
+
if expected_dtype == "":
|
527
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
528
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
529
|
+
expected_dtype = "ARRAY"
|
530
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
531
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
532
|
+
expected_dtype = "ARRAY"
|
533
|
+
else:
|
534
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
535
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
536
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
537
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
538
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
539
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
540
|
+
|
525
541
|
output_df = self._batch_inference(
|
526
542
|
dataset=dataset,
|
527
543
|
inference_method="transform",
|
@@ -570,6 +570,22 @@ class GraphicalLasso(BaseTransformer):
|
|
570
570
|
# each row containing a list of values.
|
571
571
|
expected_dtype = "ARRAY"
|
572
572
|
|
573
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
574
|
+
if expected_dtype == "":
|
575
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
576
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
577
|
+
expected_dtype = "ARRAY"
|
578
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
579
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
580
|
+
expected_dtype = "ARRAY"
|
581
|
+
else:
|
582
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
583
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
584
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
585
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
586
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
587
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
588
|
+
|
573
589
|
output_df = self._batch_inference(
|
574
590
|
dataset=dataset,
|
575
591
|
inference_method="transform",
|
@@ -596,6 +596,22 @@ class GraphicalLassoCV(BaseTransformer):
|
|
596
596
|
# each row containing a list of values.
|
597
597
|
expected_dtype = "ARRAY"
|
598
598
|
|
599
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
600
|
+
if expected_dtype == "":
|
601
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
602
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
603
|
+
expected_dtype = "ARRAY"
|
604
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
605
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
606
|
+
expected_dtype = "ARRAY"
|
607
|
+
else:
|
608
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
609
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
610
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
611
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
612
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
613
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
614
|
+
|
599
615
|
output_df = self._batch_inference(
|
600
616
|
dataset=dataset,
|
601
617
|
inference_method="transform",
|
@@ -529,6 +529,22 @@ class LedoitWolf(BaseTransformer):
|
|
529
529
|
# each row containing a list of values.
|
530
530
|
expected_dtype = "ARRAY"
|
531
531
|
|
532
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
533
|
+
if expected_dtype == "":
|
534
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
535
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
536
|
+
expected_dtype = "ARRAY"
|
537
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
538
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
539
|
+
expected_dtype = "ARRAY"
|
540
|
+
else:
|
541
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
542
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
543
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
544
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
545
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
546
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
547
|
+
|
532
548
|
output_df = self._batch_inference(
|
533
549
|
dataset=dataset,
|
534
550
|
inference_method="transform",
|
@@ -541,6 +541,22 @@ class MinCovDet(BaseTransformer):
|
|
541
541
|
# each row containing a list of values.
|
542
542
|
expected_dtype = "ARRAY"
|
543
543
|
|
544
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
545
|
+
if expected_dtype == "":
|
546
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
547
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
548
|
+
expected_dtype = "ARRAY"
|
549
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
550
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
551
|
+
expected_dtype = "ARRAY"
|
552
|
+
else:
|
553
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
554
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
555
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
556
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
557
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
558
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
559
|
+
|
544
560
|
output_df = self._batch_inference(
|
545
561
|
dataset=dataset,
|
546
562
|
inference_method="transform",
|