snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +16 -13
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
- snowflake/ml/_internal/telemetry.py +19 -0
- snowflake/ml/feature_store/__init__.py +9 -0
- snowflake/ml/feature_store/entity.py +73 -0
- snowflake/ml/feature_store/feature_store.py +1657 -0
- snowflake/ml/feature_store/feature_view.py +459 -0
- snowflake/ml/model/_client/ops/model_ops.py +16 -38
- snowflake/ml/model/_client/sql/model.py +1 -7
- snowflake/ml/model/_client/sql/model_version.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
- snowflake/ml/model/model_signature.py +72 -16
- snowflake/ml/model/type_hints.py +12 -0
- snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
- snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
- snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
- snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
- snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
- snowflake/ml/modeling/cluster/birch.py +19 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
- snowflake/ml/modeling/cluster/dbscan.py +19 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
- snowflake/ml/modeling/cluster/k_means.py +19 -3
- snowflake/ml/modeling/cluster/mean_shift.py +19 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
- snowflake/ml/modeling/cluster/optics.py +19 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
- snowflake/ml/modeling/compose/column_transformer.py +19 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
- snowflake/ml/modeling/covariance/oas.py +19 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/pca.py +19 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
- snowflake/ml/modeling/impute/knn_imputer.py +19 -3
- snowflake/ml/modeling/impute/missing_indicator.py +19 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/lars.py +19 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/perceptron.py +19 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ridge.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
- snowflake/ml/modeling/manifold/isomap.py +19 -3
- snowflake/ml/modeling/manifold/mds.py +19 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
- snowflake/ml/modeling/manifold/tsne.py +19 -3
- snowflake/ml/modeling/metrics/classification.py +5 -6
- snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +7 -3
- snowflake/ml/modeling/metrics/regression.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
- snowflake/ml/modeling/svm/linear_svc.py +19 -3
- snowflake/ml/modeling/svm/linear_svr.py +19 -3
- snowflake/ml/modeling/svm/nu_svc.py +19 -3
- snowflake/ml/modeling/svm/nu_svr.py +19 -3
- snowflake/ml/modeling/svm/svc.py +19 -3
- snowflake/ml/modeling/svm/svr.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
- snowflake/ml/registry/registry.py +2 -0
- snowflake/ml/version.py +1 -1
- snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
- snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
- /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
- /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
27
27
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
28
28
|
from snowflake.snowpark import DataFrame, Session
|
29
29
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
32
32
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
33
33
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
36
36
|
transform_snowml_obj_to_sklearn_obj,
|
37
37
|
validate_sklearn_args,
|
38
38
|
)
|
39
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
39
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
40
40
|
|
41
41
|
from snowflake.ml.model.model_signature import (
|
42
42
|
DataType,
|
@@ -167,7 +167,7 @@ class SelectFpr(BaseTransformer):
|
|
167
167
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
168
168
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
169
169
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
170
|
-
self._handlers:
|
170
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=SelectFpr.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
171
171
|
self._autogenerated = True
|
172
172
|
|
173
173
|
def _get_rand_id(self) -> str:
|
@@ -527,6 +527,22 @@ class SelectFpr(BaseTransformer):
|
|
527
527
|
# each row containing a list of values.
|
528
528
|
expected_dtype = "ARRAY"
|
529
529
|
|
530
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
531
|
+
if expected_dtype == "":
|
532
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
533
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
534
|
+
expected_dtype = "ARRAY"
|
535
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
536
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
537
|
+
expected_dtype = "ARRAY"
|
538
|
+
else:
|
539
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
540
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
541
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
542
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
543
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
544
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
545
|
+
|
530
546
|
output_df = self._batch_inference(
|
531
547
|
dataset=dataset,
|
532
548
|
inference_method="transform",
|
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
27
27
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
28
28
|
from snowflake.snowpark import DataFrame, Session
|
29
29
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
32
32
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
33
33
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
36
36
|
transform_snowml_obj_to_sklearn_obj,
|
37
37
|
validate_sklearn_args,
|
38
38
|
)
|
39
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
39
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
40
40
|
|
41
41
|
from snowflake.ml.model.model_signature import (
|
42
42
|
DataType,
|
@@ -167,7 +167,7 @@ class SelectFwe(BaseTransformer):
|
|
167
167
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
168
168
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
169
169
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
170
|
-
self._handlers:
|
170
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=SelectFwe.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
171
171
|
self._autogenerated = True
|
172
172
|
|
173
173
|
def _get_rand_id(self) -> str:
|
@@ -527,6 +527,22 @@ class SelectFwe(BaseTransformer):
|
|
527
527
|
# each row containing a list of values.
|
528
528
|
expected_dtype = "ARRAY"
|
529
529
|
|
530
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
531
|
+
if expected_dtype == "":
|
532
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
533
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
534
|
+
expected_dtype = "ARRAY"
|
535
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
536
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
537
|
+
expected_dtype = "ARRAY"
|
538
|
+
else:
|
539
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
540
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
541
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
542
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
543
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
544
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
545
|
+
|
530
546
|
output_df = self._batch_inference(
|
531
547
|
dataset=dataset,
|
532
548
|
inference_method="transform",
|
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
27
27
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
28
28
|
from snowflake.snowpark import DataFrame, Session
|
29
29
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
32
32
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
33
33
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
36
36
|
transform_snowml_obj_to_sklearn_obj,
|
37
37
|
validate_sklearn_args,
|
38
38
|
)
|
39
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
39
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
40
40
|
|
41
41
|
from snowflake.ml.model.model_signature import (
|
42
42
|
DataType,
|
@@ -168,7 +168,7 @@ class SelectKBest(BaseTransformer):
|
|
168
168
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
169
169
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
170
170
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
171
|
-
self._handlers:
|
171
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=SelectKBest.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
172
172
|
self._autogenerated = True
|
173
173
|
|
174
174
|
def _get_rand_id(self) -> str:
|
@@ -528,6 +528,22 @@ class SelectKBest(BaseTransformer):
|
|
528
528
|
# each row containing a list of values.
|
529
529
|
expected_dtype = "ARRAY"
|
530
530
|
|
531
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
532
|
+
if expected_dtype == "":
|
533
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
534
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
535
|
+
expected_dtype = "ARRAY"
|
536
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
537
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
538
|
+
expected_dtype = "ARRAY"
|
539
|
+
else:
|
540
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
541
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
542
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
543
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
544
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
545
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
546
|
+
|
531
547
|
output_df = self._batch_inference(
|
532
548
|
dataset=dataset,
|
533
549
|
inference_method="transform",
|
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
27
27
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
28
28
|
from snowflake.snowpark import DataFrame, Session
|
29
29
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
32
32
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
33
33
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
36
36
|
transform_snowml_obj_to_sklearn_obj,
|
37
37
|
validate_sklearn_args,
|
38
38
|
)
|
39
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
39
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
40
40
|
|
41
41
|
from snowflake.ml.model.model_signature import (
|
42
42
|
DataType,
|
@@ -167,7 +167,7 @@ class SelectPercentile(BaseTransformer):
|
|
167
167
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
168
168
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
169
169
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
170
|
-
self._handlers:
|
170
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=SelectPercentile.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
171
171
|
self._autogenerated = True
|
172
172
|
|
173
173
|
def _get_rand_id(self) -> str:
|
@@ -527,6 +527,22 @@ class SelectPercentile(BaseTransformer):
|
|
527
527
|
# each row containing a list of values.
|
528
528
|
expected_dtype = "ARRAY"
|
529
529
|
|
530
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
531
|
+
if expected_dtype == "":
|
532
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
533
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
534
|
+
expected_dtype = "ARRAY"
|
535
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
536
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
537
|
+
expected_dtype = "ARRAY"
|
538
|
+
else:
|
539
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
540
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
541
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
542
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
543
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
544
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
545
|
+
|
530
546
|
output_df = self._batch_inference(
|
531
547
|
dataset=dataset,
|
532
548
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -225,7 +225,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
225
225
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
226
226
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
227
227
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
228
|
-
self._handlers:
|
228
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=SequentialFeatureSelector.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
229
229
|
self._autogenerated = True
|
230
230
|
|
231
231
|
def _get_rand_id(self) -> str:
|
@@ -585,6 +585,22 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
585
585
|
# each row containing a list of values.
|
586
586
|
expected_dtype = "ARRAY"
|
587
587
|
|
588
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
589
|
+
if expected_dtype == "":
|
590
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
591
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
592
|
+
expected_dtype = "ARRAY"
|
593
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
594
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
595
|
+
expected_dtype = "ARRAY"
|
596
|
+
else:
|
597
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
598
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
599
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
600
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
601
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
602
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
603
|
+
|
588
604
|
output_df = self._batch_inference(
|
589
605
|
dataset=dataset,
|
590
606
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -158,7 +158,7 @@ class VarianceThreshold(BaseTransformer):
|
|
158
158
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
159
159
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
160
160
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
161
|
-
self._handlers:
|
161
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=VarianceThreshold.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
162
162
|
self._autogenerated = True
|
163
163
|
|
164
164
|
def _get_rand_id(self) -> str:
|
@@ -518,6 +518,22 @@ class VarianceThreshold(BaseTransformer):
|
|
518
518
|
# each row containing a list of values.
|
519
519
|
expected_dtype = "ARRAY"
|
520
520
|
|
521
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
522
|
+
if expected_dtype == "":
|
523
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
524
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
525
|
+
expected_dtype = "ARRAY"
|
526
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
527
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
528
|
+
expected_dtype = "ARRAY"
|
529
|
+
else:
|
530
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
531
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
532
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
533
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
534
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
535
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
536
|
+
|
521
537
|
output_df = self._batch_inference(
|
522
538
|
dataset=dataset,
|
523
539
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -253,7 +253,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
253
253
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
254
254
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
255
255
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
256
|
-
self._handlers:
|
256
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=GaussianProcessClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
257
257
|
self._autogenerated = True
|
258
258
|
|
259
259
|
def _get_rand_id(self) -> str:
|
@@ -613,6 +613,22 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
613
613
|
# each row containing a list of values.
|
614
614
|
expected_dtype = "ARRAY"
|
615
615
|
|
616
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
617
|
+
if expected_dtype == "":
|
618
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
619
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
620
|
+
expected_dtype = "ARRAY"
|
621
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
622
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
623
|
+
expected_dtype = "ARRAY"
|
624
|
+
else:
|
625
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
626
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
627
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
628
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
629
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
630
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
631
|
+
|
616
632
|
output_df = self._batch_inference(
|
617
633
|
dataset=dataset,
|
618
634
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -244,7 +244,7 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
244
244
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
245
245
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
246
246
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
247
|
-
self._handlers:
|
247
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=GaussianProcessRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
248
248
|
self._autogenerated = True
|
249
249
|
|
250
250
|
def _get_rand_id(self) -> str:
|
@@ -604,6 +604,22 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
604
604
|
# each row containing a list of values.
|
605
605
|
expected_dtype = "ARRAY"
|
606
606
|
|
607
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
608
|
+
if expected_dtype == "":
|
609
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
610
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
611
|
+
expected_dtype = "ARRAY"
|
612
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
613
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
614
|
+
expected_dtype = "ARRAY"
|
615
|
+
else:
|
616
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
617
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
618
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
619
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
620
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
621
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
622
|
+
|
607
623
|
output_df = self._batch_inference(
|
608
624
|
dataset=dataset,
|
609
625
|
inference_method="transform",
|
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
27
27
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
28
28
|
from snowflake.snowpark import DataFrame, Session
|
29
29
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
32
32
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
33
33
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
36
36
|
transform_snowml_obj_to_sklearn_obj,
|
37
37
|
validate_sklearn_args,
|
38
38
|
)
|
39
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
39
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
40
40
|
|
41
41
|
from snowflake.ml.model.model_signature import (
|
42
42
|
DataType,
|
@@ -286,7 +286,7 @@ class IterativeImputer(BaseTransformer):
|
|
286
286
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
287
287
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
288
288
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
289
|
-
self._handlers:
|
289
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=IterativeImputer.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
290
290
|
self._autogenerated = True
|
291
291
|
|
292
292
|
def _get_rand_id(self) -> str:
|
@@ -646,6 +646,22 @@ class IterativeImputer(BaseTransformer):
|
|
646
646
|
# each row containing a list of values.
|
647
647
|
expected_dtype = "ARRAY"
|
648
648
|
|
649
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
650
|
+
if expected_dtype == "":
|
651
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
652
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
653
|
+
expected_dtype = "ARRAY"
|
654
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
655
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
656
|
+
expected_dtype = "ARRAY"
|
657
|
+
else:
|
658
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
659
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
660
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
661
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
662
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
663
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
664
|
+
|
649
665
|
output_df = self._batch_inference(
|
650
666
|
dataset=dataset,
|
651
667
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -212,7 +212,7 @@ class KNNImputer(BaseTransformer):
|
|
212
212
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
213
213
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
214
214
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
215
|
-
self._handlers:
|
215
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=KNNImputer.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
216
216
|
self._autogenerated = True
|
217
217
|
|
218
218
|
def _get_rand_id(self) -> str:
|
@@ -572,6 +572,22 @@ class KNNImputer(BaseTransformer):
|
|
572
572
|
# each row containing a list of values.
|
573
573
|
expected_dtype = "ARRAY"
|
574
574
|
|
575
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
576
|
+
if expected_dtype == "":
|
577
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
578
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
579
|
+
expected_dtype = "ARRAY"
|
580
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
581
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
582
|
+
expected_dtype = "ARRAY"
|
583
|
+
else:
|
584
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
585
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
586
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
587
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
588
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
589
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
590
|
+
|
575
591
|
output_df = self._batch_inference(
|
576
592
|
dataset=dataset,
|
577
593
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -186,7 +186,7 @@ class MissingIndicator(BaseTransformer):
|
|
186
186
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
187
187
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
188
188
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
189
|
-
self._handlers:
|
189
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=MissingIndicator.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
190
190
|
self._autogenerated = True
|
191
191
|
|
192
192
|
def _get_rand_id(self) -> str:
|
@@ -546,6 +546,22 @@ class MissingIndicator(BaseTransformer):
|
|
546
546
|
# each row containing a list of values.
|
547
547
|
expected_dtype = "ARRAY"
|
548
548
|
|
549
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
550
|
+
if expected_dtype == "":
|
551
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
552
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
553
|
+
expected_dtype = "ARRAY"
|
554
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
555
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
556
|
+
expected_dtype = "ARRAY"
|
557
|
+
else:
|
558
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
559
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
560
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
561
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
562
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
563
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
564
|
+
|
549
565
|
output_df = self._batch_inference(
|
550
566
|
dataset=dataset,
|
551
567
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -161,7 +161,7 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
161
161
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
162
162
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
163
163
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
164
|
-
self._handlers:
|
164
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=AdditiveChi2Sampler.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
165
165
|
self._autogenerated = True
|
166
166
|
|
167
167
|
def _get_rand_id(self) -> str:
|
@@ -521,6 +521,22 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
521
521
|
# each row containing a list of values.
|
522
522
|
expected_dtype = "ARRAY"
|
523
523
|
|
524
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
525
|
+
if expected_dtype == "":
|
526
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
527
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
528
|
+
expected_dtype = "ARRAY"
|
529
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
530
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
531
|
+
expected_dtype = "ARRAY"
|
532
|
+
else:
|
533
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
534
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
535
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
536
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
537
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
538
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
539
|
+
|
524
540
|
output_df = self._batch_inference(
|
525
541
|
dataset=dataset,
|
526
542
|
inference_method="transform",
|