snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +16 -13
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
- snowflake/ml/_internal/telemetry.py +19 -0
- snowflake/ml/feature_store/__init__.py +9 -0
- snowflake/ml/feature_store/entity.py +73 -0
- snowflake/ml/feature_store/feature_store.py +1657 -0
- snowflake/ml/feature_store/feature_view.py +459 -0
- snowflake/ml/model/_client/ops/model_ops.py +16 -38
- snowflake/ml/model/_client/sql/model.py +1 -7
- snowflake/ml/model/_client/sql/model_version.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
- snowflake/ml/model/model_signature.py +72 -16
- snowflake/ml/model/type_hints.py +12 -0
- snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
- snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
- snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
- snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
- snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
- snowflake/ml/modeling/cluster/birch.py +19 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
- snowflake/ml/modeling/cluster/dbscan.py +19 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
- snowflake/ml/modeling/cluster/k_means.py +19 -3
- snowflake/ml/modeling/cluster/mean_shift.py +19 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
- snowflake/ml/modeling/cluster/optics.py +19 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
- snowflake/ml/modeling/compose/column_transformer.py +19 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
- snowflake/ml/modeling/covariance/oas.py +19 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/pca.py +19 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
- snowflake/ml/modeling/impute/knn_imputer.py +19 -3
- snowflake/ml/modeling/impute/missing_indicator.py +19 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/lars.py +19 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/perceptron.py +19 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ridge.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
- snowflake/ml/modeling/manifold/isomap.py +19 -3
- snowflake/ml/modeling/manifold/mds.py +19 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
- snowflake/ml/modeling/manifold/tsne.py +19 -3
- snowflake/ml/modeling/metrics/classification.py +5 -6
- snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +7 -3
- snowflake/ml/modeling/metrics/regression.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
- snowflake/ml/modeling/svm/linear_svc.py +19 -3
- snowflake/ml/modeling/svm/linear_svr.py +19 -3
- snowflake/ml/modeling/svm/nu_svc.py +19 -3
- snowflake/ml/modeling/svm/nu_svr.py +19 -3
- snowflake/ml/modeling/svm/svc.py +19 -3
- snowflake/ml/modeling/svm/svr.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
- snowflake/ml/registry/registry.py +2 -0
- snowflake/ml/version.py +1 -1
- snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
- snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
- /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
- /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -353,7 +353,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
353
353
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
354
354
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
355
355
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
356
|
-
self._handlers:
|
356
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=GradientBoostingClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
357
357
|
self._autogenerated = True
|
358
358
|
|
359
359
|
def _get_rand_id(self) -> str:
|
@@ -713,6 +713,22 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
713
713
|
# each row containing a list of values.
|
714
714
|
expected_dtype = "ARRAY"
|
715
715
|
|
716
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
717
|
+
if expected_dtype == "":
|
718
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
719
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
720
|
+
expected_dtype = "ARRAY"
|
721
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
722
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
723
|
+
expected_dtype = "ARRAY"
|
724
|
+
else:
|
725
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
726
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
727
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
728
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
729
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
730
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
731
|
+
|
716
732
|
output_df = self._batch_inference(
|
717
733
|
dataset=dataset,
|
718
734
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -362,7 +362,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
362
362
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
363
363
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
364
364
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
365
|
-
self._handlers:
|
365
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=GradientBoostingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
366
366
|
self._autogenerated = True
|
367
367
|
|
368
368
|
def _get_rand_id(self) -> str:
|
@@ -722,6 +722,22 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
722
722
|
# each row containing a list of values.
|
723
723
|
expected_dtype = "ARRAY"
|
724
724
|
|
725
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
726
|
+
if expected_dtype == "":
|
727
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
728
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
729
|
+
expected_dtype = "ARRAY"
|
730
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
731
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
732
|
+
expected_dtype = "ARRAY"
|
733
|
+
else:
|
734
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
735
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
736
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
737
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
738
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
739
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
740
|
+
|
725
741
|
output_df = self._batch_inference(
|
726
742
|
dataset=dataset,
|
727
743
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -334,7 +334,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
334
334
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
335
335
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
336
336
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
337
|
-
self._handlers:
|
337
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=HistGradientBoostingClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
338
338
|
self._autogenerated = True
|
339
339
|
|
340
340
|
def _get_rand_id(self) -> str:
|
@@ -694,6 +694,22 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
694
694
|
# each row containing a list of values.
|
695
695
|
expected_dtype = "ARRAY"
|
696
696
|
|
697
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
698
|
+
if expected_dtype == "":
|
699
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
700
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
701
|
+
expected_dtype = "ARRAY"
|
702
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
703
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
704
|
+
expected_dtype = "ARRAY"
|
705
|
+
else:
|
706
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
707
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
708
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
709
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
710
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
711
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
712
|
+
|
697
713
|
output_df = self._batch_inference(
|
698
714
|
dataset=dataset,
|
699
715
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -325,7 +325,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
325
325
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
326
326
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
327
327
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
328
|
-
self._handlers:
|
328
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=HistGradientBoostingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
329
329
|
self._autogenerated = True
|
330
330
|
|
331
331
|
def _get_rand_id(self) -> str:
|
@@ -685,6 +685,22 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
685
685
|
# each row containing a list of values.
|
686
686
|
expected_dtype = "ARRAY"
|
687
687
|
|
688
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
689
|
+
if expected_dtype == "":
|
690
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
691
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
692
|
+
expected_dtype = "ARRAY"
|
693
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
694
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
695
|
+
expected_dtype = "ARRAY"
|
696
|
+
else:
|
697
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
698
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
699
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
700
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
701
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
702
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
703
|
+
|
688
704
|
output_df = self._batch_inference(
|
689
705
|
dataset=dataset,
|
690
706
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -225,7 +225,7 @@ class IsolationForest(BaseTransformer):
|
|
225
225
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
226
226
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
227
227
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
228
|
-
self._handlers:
|
228
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=IsolationForest.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
229
229
|
self._autogenerated = True
|
230
230
|
|
231
231
|
def _get_rand_id(self) -> str:
|
@@ -585,6 +585,22 @@ class IsolationForest(BaseTransformer):
|
|
585
585
|
# each row containing a list of values.
|
586
586
|
expected_dtype = "ARRAY"
|
587
587
|
|
588
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
589
|
+
if expected_dtype == "":
|
590
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
591
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
592
|
+
expected_dtype = "ARRAY"
|
593
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
594
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
595
|
+
expected_dtype = "ARRAY"
|
596
|
+
else:
|
597
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
598
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
599
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
600
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
601
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
602
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
603
|
+
|
588
604
|
output_df = self._batch_inference(
|
589
605
|
dataset=dataset,
|
590
606
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -337,7 +337,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
337
337
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
338
338
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
339
339
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
340
|
-
self._handlers:
|
340
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=RandomForestClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
341
341
|
self._autogenerated = True
|
342
342
|
|
343
343
|
def _get_rand_id(self) -> str:
|
@@ -697,6 +697,22 @@ class RandomForestClassifier(BaseTransformer):
|
|
697
697
|
# each row containing a list of values.
|
698
698
|
expected_dtype = "ARRAY"
|
699
699
|
|
700
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
701
|
+
if expected_dtype == "":
|
702
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
703
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
704
|
+
expected_dtype = "ARRAY"
|
705
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
706
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
707
|
+
expected_dtype = "ARRAY"
|
708
|
+
else:
|
709
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
710
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
711
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
712
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
713
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
714
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
715
|
+
|
700
716
|
output_df = self._batch_inference(
|
701
717
|
dataset=dataset,
|
702
718
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -316,7 +316,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
316
316
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
317
317
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
318
318
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
319
|
-
self._handlers:
|
319
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=RandomForestRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
320
320
|
self._autogenerated = True
|
321
321
|
|
322
322
|
def _get_rand_id(self) -> str:
|
@@ -676,6 +676,22 @@ class RandomForestRegressor(BaseTransformer):
|
|
676
676
|
# each row containing a list of values.
|
677
677
|
expected_dtype = "ARRAY"
|
678
678
|
|
679
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
680
|
+
if expected_dtype == "":
|
681
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
682
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
683
|
+
expected_dtype = "ARRAY"
|
684
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
685
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
686
|
+
expected_dtype = "ARRAY"
|
687
|
+
else:
|
688
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
689
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
690
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
691
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
692
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
693
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
694
|
+
|
679
695
|
output_df = self._batch_inference(
|
680
696
|
dataset=dataset,
|
681
697
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -217,7 +217,7 @@ class StackingRegressor(BaseTransformer):
|
|
217
217
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
218
218
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
219
219
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
220
|
-
self._handlers:
|
220
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=StackingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
221
221
|
self._autogenerated = True
|
222
222
|
|
223
223
|
def _get_rand_id(self) -> str:
|
@@ -579,6 +579,22 @@ class StackingRegressor(BaseTransformer):
|
|
579
579
|
# each row containing a list of values.
|
580
580
|
expected_dtype = "ARRAY"
|
581
581
|
|
582
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
583
|
+
if expected_dtype == "":
|
584
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
585
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
586
|
+
expected_dtype = "ARRAY"
|
587
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
588
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
589
|
+
expected_dtype = "ARRAY"
|
590
|
+
else:
|
591
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
592
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
593
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
594
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
595
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
596
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
597
|
+
|
582
598
|
output_df = self._batch_inference(
|
583
599
|
dataset=dataset,
|
584
600
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -199,7 +199,7 @@ class VotingClassifier(BaseTransformer):
|
|
199
199
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
200
200
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
201
201
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
202
|
-
self._handlers:
|
202
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=VotingClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
203
203
|
self._autogenerated = True
|
204
204
|
|
205
205
|
def _get_rand_id(self) -> str:
|
@@ -561,6 +561,22 @@ class VotingClassifier(BaseTransformer):
|
|
561
561
|
# each row containing a list of values.
|
562
562
|
expected_dtype = "ARRAY"
|
563
563
|
|
564
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
565
|
+
if expected_dtype == "":
|
566
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
567
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
568
|
+
expected_dtype = "ARRAY"
|
569
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
570
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
571
|
+
expected_dtype = "ARRAY"
|
572
|
+
else:
|
573
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
574
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
575
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
576
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
577
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
578
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
579
|
+
|
564
580
|
output_df = self._batch_inference(
|
565
581
|
dataset=dataset,
|
566
582
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -181,7 +181,7 @@ class VotingRegressor(BaseTransformer):
|
|
181
181
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
182
182
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
183
183
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
184
|
-
self._handlers:
|
184
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=VotingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
185
185
|
self._autogenerated = True
|
186
186
|
|
187
187
|
def _get_rand_id(self) -> str:
|
@@ -543,6 +543,22 @@ class VotingRegressor(BaseTransformer):
|
|
543
543
|
# each row containing a list of values.
|
544
544
|
expected_dtype = "ARRAY"
|
545
545
|
|
546
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
547
|
+
if expected_dtype == "":
|
548
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
549
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
550
|
+
expected_dtype = "ARRAY"
|
551
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
552
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
553
|
+
expected_dtype = "ARRAY"
|
554
|
+
else:
|
555
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
556
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
557
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
558
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
559
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
560
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
561
|
+
|
546
562
|
output_df = self._batch_inference(
|
547
563
|
dataset=dataset,
|
548
564
|
inference_method="transform",
|
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
27
27
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
28
28
|
from snowflake.snowpark import DataFrame, Session
|
29
29
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
32
32
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
33
33
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
36
36
|
transform_snowml_obj_to_sklearn_obj,
|
37
37
|
validate_sklearn_args,
|
38
38
|
)
|
39
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
39
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
40
40
|
|
41
41
|
from snowflake.ml.model.model_signature import (
|
42
42
|
DataType,
|
@@ -171,7 +171,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
171
171
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
172
172
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
173
173
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
174
|
-
self._handlers:
|
174
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=GenericUnivariateSelect.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
175
175
|
self._autogenerated = True
|
176
176
|
|
177
177
|
def _get_rand_id(self) -> str:
|
@@ -531,6 +531,22 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
531
531
|
# each row containing a list of values.
|
532
532
|
expected_dtype = "ARRAY"
|
533
533
|
|
534
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
535
|
+
if expected_dtype == "":
|
536
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
537
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
538
|
+
expected_dtype = "ARRAY"
|
539
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
540
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
541
|
+
expected_dtype = "ARRAY"
|
542
|
+
else:
|
543
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
544
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
545
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
546
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
547
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
548
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
549
|
+
|
534
550
|
output_df = self._batch_inference(
|
535
551
|
dataset=dataset,
|
536
552
|
inference_method="transform",
|
@@ -27,7 +27,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
27
27
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
28
28
|
from snowflake.snowpark import DataFrame, Session
|
29
29
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
32
32
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
33
33
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -36,7 +36,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
36
36
|
transform_snowml_obj_to_sklearn_obj,
|
37
37
|
validate_sklearn_args,
|
38
38
|
)
|
39
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
39
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
40
40
|
|
41
41
|
from snowflake.ml.model.model_signature import (
|
42
42
|
DataType,
|
@@ -167,7 +167,7 @@ class SelectFdr(BaseTransformer):
|
|
167
167
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
168
168
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
169
169
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
170
|
-
self._handlers:
|
170
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=SelectFdr.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
171
171
|
self._autogenerated = True
|
172
172
|
|
173
173
|
def _get_rand_id(self) -> str:
|
@@ -527,6 +527,22 @@ class SelectFdr(BaseTransformer):
|
|
527
527
|
# each row containing a list of values.
|
528
528
|
expected_dtype = "ARRAY"
|
529
529
|
|
530
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
531
|
+
if expected_dtype == "":
|
532
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
533
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
534
|
+
expected_dtype = "ARRAY"
|
535
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
536
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
537
|
+
expected_dtype = "ARRAY"
|
538
|
+
else:
|
539
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
540
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
541
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
542
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
543
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
544
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
545
|
+
|
530
546
|
output_df = self._batch_inference(
|
531
547
|
dataset=dataset,
|
532
548
|
inference_method="transform",
|