snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +16 -13
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
- snowflake/ml/_internal/telemetry.py +19 -0
- snowflake/ml/feature_store/__init__.py +9 -0
- snowflake/ml/feature_store/entity.py +73 -0
- snowflake/ml/feature_store/feature_store.py +1657 -0
- snowflake/ml/feature_store/feature_view.py +459 -0
- snowflake/ml/model/_client/ops/model_ops.py +16 -38
- snowflake/ml/model/_client/sql/model.py +1 -7
- snowflake/ml/model/_client/sql/model_version.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
- snowflake/ml/model/model_signature.py +72 -16
- snowflake/ml/model/type_hints.py +12 -0
- snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
- snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
- snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
- snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
- snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
- snowflake/ml/modeling/cluster/birch.py +19 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
- snowflake/ml/modeling/cluster/dbscan.py +19 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
- snowflake/ml/modeling/cluster/k_means.py +19 -3
- snowflake/ml/modeling/cluster/mean_shift.py +19 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
- snowflake/ml/modeling/cluster/optics.py +19 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
- snowflake/ml/modeling/compose/column_transformer.py +19 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
- snowflake/ml/modeling/covariance/oas.py +19 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/pca.py +19 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
- snowflake/ml/modeling/impute/knn_imputer.py +19 -3
- snowflake/ml/modeling/impute/missing_indicator.py +19 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/lars.py +19 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/perceptron.py +19 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ridge.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
- snowflake/ml/modeling/manifold/isomap.py +19 -3
- snowflake/ml/modeling/manifold/mds.py +19 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
- snowflake/ml/modeling/manifold/tsne.py +19 -3
- snowflake/ml/modeling/metrics/classification.py +5 -6
- snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +7 -3
- snowflake/ml/modeling/metrics/regression.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
- snowflake/ml/modeling/svm/linear_svc.py +19 -3
- snowflake/ml/modeling/svm/linear_svr.py +19 -3
- snowflake/ml/modeling/svm/nu_svc.py +19 -3
- snowflake/ml/modeling/svm/nu_svr.py +19 -3
- snowflake/ml/modeling/svm/svc.py +19 -3
- snowflake/ml/modeling/svm/svr.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
- snowflake/ml/registry/registry.py +2 -0
- snowflake/ml/version.py +1 -1
- snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
- snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
- /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
- /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -255,7 +255,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
255
255
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
256
256
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
257
257
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
258
|
-
self._handlers:
|
258
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskElasticNetCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
259
259
|
self._autogenerated = True
|
260
260
|
|
261
261
|
def _get_rand_id(self) -> str:
|
@@ -615,6 +615,22 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
615
615
|
# each row containing a list of values.
|
616
616
|
expected_dtype = "ARRAY"
|
617
617
|
|
618
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
619
|
+
if expected_dtype == "":
|
620
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
621
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
622
|
+
expected_dtype = "ARRAY"
|
623
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
624
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
625
|
+
expected_dtype = "ARRAY"
|
626
|
+
else:
|
627
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
628
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
629
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
630
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
631
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
632
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
633
|
+
|
618
634
|
output_df = self._batch_inference(
|
619
635
|
dataset=dataset,
|
620
636
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -206,7 +206,7 @@ class MultiTaskLasso(BaseTransformer):
|
|
206
206
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
207
207
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
208
208
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
209
|
-
self._handlers:
|
209
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskLasso.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
210
210
|
self._autogenerated = True
|
211
211
|
|
212
212
|
def _get_rand_id(self) -> str:
|
@@ -566,6 +566,22 @@ class MultiTaskLasso(BaseTransformer):
|
|
566
566
|
# each row containing a list of values.
|
567
567
|
expected_dtype = "ARRAY"
|
568
568
|
|
569
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
570
|
+
if expected_dtype == "":
|
571
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
572
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
573
|
+
expected_dtype = "ARRAY"
|
574
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
575
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
576
|
+
expected_dtype = "ARRAY"
|
577
|
+
else:
|
578
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
579
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
580
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
581
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
582
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
583
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
584
|
+
|
569
585
|
output_df = self._batch_inference(
|
570
586
|
dataset=dataset,
|
571
587
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -241,7 +241,7 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
241
241
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
242
242
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
243
243
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
244
|
-
self._handlers:
|
244
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskLassoCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
245
245
|
self._autogenerated = True
|
246
246
|
|
247
247
|
def _get_rand_id(self) -> str:
|
@@ -601,6 +601,22 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
601
601
|
# each row containing a list of values.
|
602
602
|
expected_dtype = "ARRAY"
|
603
603
|
|
604
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
605
|
+
if expected_dtype == "":
|
606
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
607
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
608
|
+
expected_dtype = "ARRAY"
|
609
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
610
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
611
|
+
expected_dtype = "ARRAY"
|
612
|
+
else:
|
613
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
614
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
615
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
616
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
617
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
618
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
619
|
+
|
604
620
|
output_df = self._batch_inference(
|
605
621
|
dataset=dataset,
|
606
622
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -189,7 +189,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
189
189
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
190
190
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
191
191
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
192
|
-
self._handlers:
|
192
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=OrthogonalMatchingPursuit.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
193
193
|
self._autogenerated = True
|
194
194
|
|
195
195
|
def _get_rand_id(self) -> str:
|
@@ -549,6 +549,22 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
549
549
|
# each row containing a list of values.
|
550
550
|
expected_dtype = "ARRAY"
|
551
551
|
|
552
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
553
|
+
if expected_dtype == "":
|
554
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
555
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
556
|
+
expected_dtype = "ARRAY"
|
557
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
558
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
559
|
+
expected_dtype = "ARRAY"
|
560
|
+
else:
|
561
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
562
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
563
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
564
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
565
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
566
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
567
|
+
|
552
568
|
output_df = self._batch_inference(
|
553
569
|
dataset=dataset,
|
554
570
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -263,7 +263,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
263
263
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
264
264
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
265
265
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
266
|
-
self._handlers:
|
266
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=PassiveAggressiveClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
267
267
|
self._autogenerated = True
|
268
268
|
|
269
269
|
def _get_rand_id(self) -> str:
|
@@ -623,6 +623,22 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
623
623
|
# each row containing a list of values.
|
624
624
|
expected_dtype = "ARRAY"
|
625
625
|
|
626
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
627
|
+
if expected_dtype == "":
|
628
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
629
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
630
|
+
expected_dtype = "ARRAY"
|
631
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
632
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
633
|
+
expected_dtype = "ARRAY"
|
634
|
+
else:
|
635
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
636
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
637
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
638
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
639
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
640
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
641
|
+
|
626
642
|
output_df = self._batch_inference(
|
627
643
|
dataset=dataset,
|
628
644
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -249,7 +249,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
249
249
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
250
250
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
251
251
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
252
|
-
self._handlers:
|
252
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=PassiveAggressiveRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
253
253
|
self._autogenerated = True
|
254
254
|
|
255
255
|
def _get_rand_id(self) -> str:
|
@@ -609,6 +609,22 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
609
609
|
# each row containing a list of values.
|
610
610
|
expected_dtype = "ARRAY"
|
611
611
|
|
612
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
613
|
+
if expected_dtype == "":
|
614
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
615
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
616
|
+
expected_dtype = "ARRAY"
|
617
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
618
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
619
|
+
expected_dtype = "ARRAY"
|
620
|
+
else:
|
621
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
622
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
623
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
624
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
625
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
626
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
627
|
+
|
612
628
|
output_df = self._batch_inference(
|
613
629
|
dataset=dataset,
|
614
630
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -262,7 +262,7 @@ class Perceptron(BaseTransformer):
|
|
262
262
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
263
263
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
264
264
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
265
|
-
self._handlers:
|
265
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=Perceptron.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
266
266
|
self._autogenerated = True
|
267
267
|
|
268
268
|
def _get_rand_id(self) -> str:
|
@@ -622,6 +622,22 @@ class Perceptron(BaseTransformer):
|
|
622
622
|
# each row containing a list of values.
|
623
623
|
expected_dtype = "ARRAY"
|
624
624
|
|
625
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
626
|
+
if expected_dtype == "":
|
627
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
628
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
629
|
+
expected_dtype = "ARRAY"
|
630
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
631
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
632
|
+
expected_dtype = "ARRAY"
|
633
|
+
else:
|
634
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
635
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
636
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
637
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
638
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
639
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
640
|
+
|
625
641
|
output_df = self._batch_inference(
|
626
642
|
dataset=dataset,
|
627
643
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -211,7 +211,7 @@ class PoissonRegressor(BaseTransformer):
|
|
211
211
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
212
212
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
213
213
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
214
|
-
self._handlers:
|
214
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=PoissonRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
215
215
|
self._autogenerated = True
|
216
216
|
|
217
217
|
def _get_rand_id(self) -> str:
|
@@ -571,6 +571,22 @@ class PoissonRegressor(BaseTransformer):
|
|
571
571
|
# each row containing a list of values.
|
572
572
|
expected_dtype = "ARRAY"
|
573
573
|
|
574
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
575
|
+
if expected_dtype == "":
|
576
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
577
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
578
|
+
expected_dtype = "ARRAY"
|
579
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
580
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
581
|
+
expected_dtype = "ARRAY"
|
582
|
+
else:
|
583
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
584
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
585
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
586
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
587
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
588
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
589
|
+
|
574
590
|
output_df = self._batch_inference(
|
575
591
|
dataset=dataset,
|
576
592
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -267,7 +267,7 @@ class RANSACRegressor(BaseTransformer):
|
|
267
267
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
268
268
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
269
269
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
270
|
-
self._handlers:
|
270
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=RANSACRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
271
271
|
self._autogenerated = True
|
272
272
|
|
273
273
|
def _get_rand_id(self) -> str:
|
@@ -627,6 +627,22 @@ class RANSACRegressor(BaseTransformer):
|
|
627
627
|
# each row containing a list of values.
|
628
628
|
expected_dtype = "ARRAY"
|
629
629
|
|
630
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
631
|
+
if expected_dtype == "":
|
632
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
633
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
634
|
+
expected_dtype = "ARRAY"
|
635
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
636
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
637
|
+
expected_dtype = "ARRAY"
|
638
|
+
else:
|
639
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
640
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
641
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
642
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
643
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
644
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
645
|
+
|
630
646
|
output_df = self._batch_inference(
|
631
647
|
dataset=dataset,
|
632
648
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -259,7 +259,7 @@ class Ridge(BaseTransformer):
|
|
259
259
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
260
260
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
261
261
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
262
|
-
self._handlers:
|
262
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=Ridge.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
263
263
|
self._autogenerated = True
|
264
264
|
|
265
265
|
def _get_rand_id(self) -> str:
|
@@ -619,6 +619,22 @@ class Ridge(BaseTransformer):
|
|
619
619
|
# each row containing a list of values.
|
620
620
|
expected_dtype = "ARRAY"
|
621
621
|
|
622
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
623
|
+
if expected_dtype == "":
|
624
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
625
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
626
|
+
expected_dtype = "ARRAY"
|
627
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
628
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
629
|
+
expected_dtype = "ARRAY"
|
630
|
+
else:
|
631
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
632
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
633
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
634
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
635
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
636
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
637
|
+
|
622
638
|
output_df = self._batch_inference(
|
623
639
|
dataset=dataset,
|
624
640
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -259,7 +259,7 @@ class RidgeClassifier(BaseTransformer):
|
|
259
259
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
260
260
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
261
261
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
262
|
-
self._handlers:
|
262
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=RidgeClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
263
263
|
self._autogenerated = True
|
264
264
|
|
265
265
|
def _get_rand_id(self) -> str:
|
@@ -619,6 +619,22 @@ class RidgeClassifier(BaseTransformer):
|
|
619
619
|
# each row containing a list of values.
|
620
620
|
expected_dtype = "ARRAY"
|
621
621
|
|
622
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
623
|
+
if expected_dtype == "":
|
624
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
625
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
626
|
+
expected_dtype = "ARRAY"
|
627
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
628
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
629
|
+
expected_dtype = "ARRAY"
|
630
|
+
else:
|
631
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
632
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
633
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
634
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
635
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
636
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
637
|
+
|
622
638
|
output_df = self._batch_inference(
|
623
639
|
dataset=dataset,
|
624
640
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -210,7 +210,7 @@ class RidgeClassifierCV(BaseTransformer):
|
|
210
210
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
211
211
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
212
212
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
213
|
-
self._handlers:
|
213
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=RidgeClassifierCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
214
214
|
self._autogenerated = True
|
215
215
|
|
216
216
|
def _get_rand_id(self) -> str:
|
@@ -570,6 +570,22 @@ class RidgeClassifierCV(BaseTransformer):
|
|
570
570
|
# each row containing a list of values.
|
571
571
|
expected_dtype = "ARRAY"
|
572
572
|
|
573
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
574
|
+
if expected_dtype == "":
|
575
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
576
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
577
|
+
expected_dtype = "ARRAY"
|
578
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
579
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
580
|
+
expected_dtype = "ARRAY"
|
581
|
+
else:
|
582
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
583
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
584
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
585
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
586
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
587
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
588
|
+
|
573
589
|
output_df = self._batch_inference(
|
574
590
|
dataset=dataset,
|
575
591
|
inference_method="transform",
|