snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +16 -13
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
- snowflake/ml/_internal/telemetry.py +19 -0
- snowflake/ml/feature_store/__init__.py +9 -0
- snowflake/ml/feature_store/entity.py +73 -0
- snowflake/ml/feature_store/feature_store.py +1657 -0
- snowflake/ml/feature_store/feature_view.py +459 -0
- snowflake/ml/model/_client/ops/model_ops.py +16 -38
- snowflake/ml/model/_client/sql/model.py +1 -7
- snowflake/ml/model/_client/sql/model_version.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
- snowflake/ml/model/model_signature.py +72 -16
- snowflake/ml/model/type_hints.py +12 -0
- snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
- snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
- snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
- snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
- snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
- snowflake/ml/modeling/cluster/birch.py +19 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
- snowflake/ml/modeling/cluster/dbscan.py +19 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
- snowflake/ml/modeling/cluster/k_means.py +19 -3
- snowflake/ml/modeling/cluster/mean_shift.py +19 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
- snowflake/ml/modeling/cluster/optics.py +19 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
- snowflake/ml/modeling/compose/column_transformer.py +19 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
- snowflake/ml/modeling/covariance/oas.py +19 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/pca.py +19 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
- snowflake/ml/modeling/impute/knn_imputer.py +19 -3
- snowflake/ml/modeling/impute/missing_indicator.py +19 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/lars.py +19 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/perceptron.py +19 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ridge.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
- snowflake/ml/modeling/manifold/isomap.py +19 -3
- snowflake/ml/modeling/manifold/mds.py +19 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
- snowflake/ml/modeling/manifold/tsne.py +19 -3
- snowflake/ml/modeling/metrics/classification.py +5 -6
- snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +7 -3
- snowflake/ml/modeling/metrics/regression.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
- snowflake/ml/modeling/svm/linear_svc.py +19 -3
- snowflake/ml/modeling/svm/linear_svr.py +19 -3
- snowflake/ml/modeling/svm/nu_svc.py +19 -3
- snowflake/ml/modeling/svm/nu_svr.py +19 -3
- snowflake/ml/modeling/svm/svc.py +19 -3
- snowflake/ml/modeling/svm/svr.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
- snowflake/ml/registry/registry.py +2 -0
- snowflake/ml/version.py +1 -1
- snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
- snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
- /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
- /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -246,7 +246,7 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
246
246
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
247
247
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
248
248
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
249
|
-
self._handlers:
|
249
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=MiniBatchSparsePCA.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
250
250
|
self._autogenerated = True
|
251
251
|
|
252
252
|
def _get_rand_id(self) -> str:
|
@@ -606,6 +606,22 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
606
606
|
# each row containing a list of values.
|
607
607
|
expected_dtype = "ARRAY"
|
608
608
|
|
609
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
610
|
+
if expected_dtype == "":
|
611
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
612
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
613
|
+
expected_dtype = "ARRAY"
|
614
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
615
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
616
|
+
expected_dtype = "ARRAY"
|
617
|
+
else:
|
618
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
619
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
620
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
621
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
622
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
623
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
624
|
+
|
609
625
|
output_df = self._batch_inference(
|
610
626
|
dataset=dataset,
|
611
627
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -248,7 +248,7 @@ class PCA(BaseTransformer):
|
|
248
248
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
249
249
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
250
250
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
251
|
-
self._handlers:
|
251
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=PCA.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
252
252
|
self._autogenerated = True
|
253
253
|
|
254
254
|
def _get_rand_id(self) -> str:
|
@@ -608,6 +608,22 @@ class PCA(BaseTransformer):
|
|
608
608
|
# each row containing a list of values.
|
609
609
|
expected_dtype = "ARRAY"
|
610
610
|
|
611
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
612
|
+
if expected_dtype == "":
|
613
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
614
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
615
|
+
expected_dtype = "ARRAY"
|
616
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
617
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
618
|
+
expected_dtype = "ARRAY"
|
619
|
+
else:
|
620
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
621
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
622
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
623
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
624
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
625
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
626
|
+
|
611
627
|
output_df = self._batch_inference(
|
612
628
|
dataset=dataset,
|
613
629
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -221,7 +221,7 @@ class SparsePCA(BaseTransformer):
|
|
221
221
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
222
222
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
223
223
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
224
|
-
self._handlers:
|
224
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=SparsePCA.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
225
225
|
self._autogenerated = True
|
226
226
|
|
227
227
|
def _get_rand_id(self) -> str:
|
@@ -581,6 +581,22 @@ class SparsePCA(BaseTransformer):
|
|
581
581
|
# each row containing a list of values.
|
582
582
|
expected_dtype = "ARRAY"
|
583
583
|
|
584
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
585
|
+
if expected_dtype == "":
|
586
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
587
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
588
|
+
expected_dtype = "ARRAY"
|
589
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
590
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
591
|
+
expected_dtype = "ARRAY"
|
592
|
+
else:
|
593
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
594
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
595
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
596
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
597
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
598
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
599
|
+
|
584
600
|
output_df = self._batch_inference(
|
585
601
|
dataset=dataset,
|
586
602
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -202,7 +202,7 @@ class TruncatedSVD(BaseTransformer):
|
|
202
202
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
203
203
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
204
204
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
205
|
-
self._handlers:
|
205
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=TruncatedSVD.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
206
206
|
self._autogenerated = True
|
207
207
|
|
208
208
|
def _get_rand_id(self) -> str:
|
@@ -562,6 +562,22 @@ class TruncatedSVD(BaseTransformer):
|
|
562
562
|
# each row containing a list of values.
|
563
563
|
expected_dtype = "ARRAY"
|
564
564
|
|
565
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
566
|
+
if expected_dtype == "":
|
567
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
568
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
569
|
+
expected_dtype = "ARRAY"
|
570
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
571
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
572
|
+
expected_dtype = "ARRAY"
|
573
|
+
else:
|
574
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
575
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
576
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
577
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
578
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
579
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
580
|
+
|
565
581
|
output_df = self._batch_inference(
|
566
582
|
dataset=dataset,
|
567
583
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -219,7 +219,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
219
219
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
220
220
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
221
221
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
222
|
-
self._handlers:
|
222
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=LinearDiscriminantAnalysis.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
223
223
|
self._autogenerated = True
|
224
224
|
|
225
225
|
def _get_rand_id(self) -> str:
|
@@ -581,6 +581,22 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
581
581
|
# each row containing a list of values.
|
582
582
|
expected_dtype = "ARRAY"
|
583
583
|
|
584
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
585
|
+
if expected_dtype == "":
|
586
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
587
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
588
|
+
expected_dtype = "ARRAY"
|
589
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
590
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
591
|
+
expected_dtype = "ARRAY"
|
592
|
+
else:
|
593
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
594
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
595
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
596
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
597
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
598
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
599
|
+
|
584
600
|
output_df = self._batch_inference(
|
585
601
|
dataset=dataset,
|
586
602
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -181,7 +181,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
181
181
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
182
182
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
183
183
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
184
|
-
self._handlers:
|
184
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=QuadraticDiscriminantAnalysis.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
185
185
|
self._autogenerated = True
|
186
186
|
|
187
187
|
def _get_rand_id(self) -> str:
|
@@ -541,6 +541,22 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
541
541
|
# each row containing a list of values.
|
542
542
|
expected_dtype = "ARRAY"
|
543
543
|
|
544
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
545
|
+
if expected_dtype == "":
|
546
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
547
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
548
|
+
expected_dtype = "ARRAY"
|
549
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
550
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
551
|
+
expected_dtype = "ARRAY"
|
552
|
+
else:
|
553
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
554
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
555
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
556
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
557
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
558
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
559
|
+
|
544
560
|
output_df = self._batch_inference(
|
545
561
|
dataset=dataset,
|
546
562
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -206,7 +206,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
206
206
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
207
207
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
208
208
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
209
|
-
self._handlers:
|
209
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=AdaBoostClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
210
210
|
self._autogenerated = True
|
211
211
|
|
212
212
|
def _get_rand_id(self) -> str:
|
@@ -566,6 +566,22 @@ class AdaBoostClassifier(BaseTransformer):
|
|
566
566
|
# each row containing a list of values.
|
567
567
|
expected_dtype = "ARRAY"
|
568
568
|
|
569
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
570
|
+
if expected_dtype == "":
|
571
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
572
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
573
|
+
expected_dtype = "ARRAY"
|
574
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
575
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
576
|
+
expected_dtype = "ARRAY"
|
577
|
+
else:
|
578
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
579
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
580
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
581
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
582
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
583
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
584
|
+
|
569
585
|
output_df = self._batch_inference(
|
570
586
|
dataset=dataset,
|
571
587
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -203,7 +203,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
203
203
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
204
204
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
205
205
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
206
|
-
self._handlers:
|
206
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=AdaBoostRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
207
207
|
self._autogenerated = True
|
208
208
|
|
209
209
|
def _get_rand_id(self) -> str:
|
@@ -563,6 +563,22 @@ class AdaBoostRegressor(BaseTransformer):
|
|
563
563
|
# each row containing a list of values.
|
564
564
|
expected_dtype = "ARRAY"
|
565
565
|
|
566
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
567
|
+
if expected_dtype == "":
|
568
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
569
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
570
|
+
expected_dtype = "ARRAY"
|
571
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
572
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
573
|
+
expected_dtype = "ARRAY"
|
574
|
+
else:
|
575
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
576
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
577
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
578
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
579
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
580
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
581
|
+
|
566
582
|
output_df = self._batch_inference(
|
567
583
|
dataset=dataset,
|
568
584
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -238,7 +238,7 @@ class BaggingClassifier(BaseTransformer):
|
|
238
238
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
239
239
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
240
240
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
241
|
-
self._handlers:
|
241
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=BaggingClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
242
242
|
self._autogenerated = True
|
243
243
|
|
244
244
|
def _get_rand_id(self) -> str:
|
@@ -598,6 +598,22 @@ class BaggingClassifier(BaseTransformer):
|
|
598
598
|
# each row containing a list of values.
|
599
599
|
expected_dtype = "ARRAY"
|
600
600
|
|
601
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
602
|
+
if expected_dtype == "":
|
603
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
604
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
605
|
+
expected_dtype = "ARRAY"
|
606
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
607
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
608
|
+
expected_dtype = "ARRAY"
|
609
|
+
else:
|
610
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
611
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
612
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
613
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
614
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
615
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
616
|
+
|
601
617
|
output_df = self._batch_inference(
|
602
618
|
dataset=dataset,
|
603
619
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -238,7 +238,7 @@ class BaggingRegressor(BaseTransformer):
|
|
238
238
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
239
239
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
240
240
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
241
|
-
self._handlers:
|
241
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=BaggingRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
242
242
|
self._autogenerated = True
|
243
243
|
|
244
244
|
def _get_rand_id(self) -> str:
|
@@ -598,6 +598,22 @@ class BaggingRegressor(BaseTransformer):
|
|
598
598
|
# each row containing a list of values.
|
599
599
|
expected_dtype = "ARRAY"
|
600
600
|
|
601
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
602
|
+
if expected_dtype == "":
|
603
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
604
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
605
|
+
expected_dtype = "ARRAY"
|
606
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
607
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
608
|
+
expected_dtype = "ARRAY"
|
609
|
+
else:
|
610
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
611
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
612
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
613
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
614
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
615
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
616
|
+
|
601
617
|
output_df = self._batch_inference(
|
602
618
|
dataset=dataset,
|
603
619
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -341,7 +341,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
341
341
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
342
342
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
343
343
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
344
|
-
self._handlers:
|
344
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=ExtraTreesClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
345
345
|
self._autogenerated = True
|
346
346
|
|
347
347
|
def _get_rand_id(self) -> str:
|
@@ -701,6 +701,22 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
701
701
|
# each row containing a list of values.
|
702
702
|
expected_dtype = "ARRAY"
|
703
703
|
|
704
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
705
|
+
if expected_dtype == "":
|
706
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
707
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
708
|
+
expected_dtype = "ARRAY"
|
709
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
710
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
711
|
+
expected_dtype = "ARRAY"
|
712
|
+
else:
|
713
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
714
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
715
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
716
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
717
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
718
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
719
|
+
|
704
720
|
output_df = self._batch_inference(
|
705
721
|
dataset=dataset,
|
706
722
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -320,7 +320,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
320
320
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
321
321
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
322
322
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
323
|
-
self._handlers:
|
323
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=ExtraTreesRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
324
324
|
self._autogenerated = True
|
325
325
|
|
326
326
|
def _get_rand_id(self) -> str:
|
@@ -680,6 +680,22 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
680
680
|
# each row containing a list of values.
|
681
681
|
expected_dtype = "ARRAY"
|
682
682
|
|
683
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
684
|
+
if expected_dtype == "":
|
685
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
686
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
687
|
+
expected_dtype = "ARRAY"
|
688
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
689
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
690
|
+
expected_dtype = "ARRAY"
|
691
|
+
else:
|
692
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
693
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
694
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
695
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
696
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
697
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
698
|
+
|
683
699
|
output_df = self._batch_inference(
|
684
700
|
dataset=dataset,
|
685
701
|
inference_method="transform",
|