snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +16 -13
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
- snowflake/ml/_internal/telemetry.py +19 -0
- snowflake/ml/feature_store/__init__.py +9 -0
- snowflake/ml/feature_store/entity.py +73 -0
- snowflake/ml/feature_store/feature_store.py +1657 -0
- snowflake/ml/feature_store/feature_view.py +459 -0
- snowflake/ml/model/_client/ops/model_ops.py +16 -38
- snowflake/ml/model/_client/sql/model.py +1 -7
- snowflake/ml/model/_client/sql/model_version.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
- snowflake/ml/model/model_signature.py +72 -16
- snowflake/ml/model/type_hints.py +12 -0
- snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
- snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
- snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
- snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
- snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
- snowflake/ml/modeling/cluster/birch.py +19 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
- snowflake/ml/modeling/cluster/dbscan.py +19 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
- snowflake/ml/modeling/cluster/k_means.py +19 -3
- snowflake/ml/modeling/cluster/mean_shift.py +19 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
- snowflake/ml/modeling/cluster/optics.py +19 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
- snowflake/ml/modeling/compose/column_transformer.py +19 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
- snowflake/ml/modeling/covariance/oas.py +19 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/pca.py +19 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
- snowflake/ml/modeling/impute/knn_imputer.py +19 -3
- snowflake/ml/modeling/impute/missing_indicator.py +19 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/lars.py +19 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/perceptron.py +19 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ridge.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
- snowflake/ml/modeling/manifold/isomap.py +19 -3
- snowflake/ml/modeling/manifold/mds.py +19 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
- snowflake/ml/modeling/manifold/tsne.py +19 -3
- snowflake/ml/modeling/metrics/classification.py +5 -6
- snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +7 -3
- snowflake/ml/modeling/metrics/regression.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
- snowflake/ml/modeling/svm/linear_svc.py +19 -3
- snowflake/ml/modeling/svm/linear_svr.py +19 -3
- snowflake/ml/modeling/svm/nu_svc.py +19 -3
- snowflake/ml/modeling/svm/nu_svr.py +19 -3
- snowflake/ml/modeling/svm/svc.py +19 -3
- snowflake/ml/modeling/svm/svr.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
- snowflake/ml/registry/registry.py +2 -0
- snowflake/ml/version.py +1 -1
- snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
- snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
- /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
- /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -194,7 +194,7 @@ class HuberRegressor(BaseTransformer):
|
|
194
194
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
195
195
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
196
196
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
197
|
-
self._handlers:
|
197
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=HuberRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
198
198
|
self._autogenerated = True
|
199
199
|
|
200
200
|
def _get_rand_id(self) -> str:
|
@@ -554,6 +554,22 @@ class HuberRegressor(BaseTransformer):
|
|
554
554
|
# each row containing a list of values.
|
555
555
|
expected_dtype = "ARRAY"
|
556
556
|
|
557
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
558
|
+
if expected_dtype == "":
|
559
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
560
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
561
|
+
expected_dtype = "ARRAY"
|
562
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
563
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
564
|
+
expected_dtype = "ARRAY"
|
565
|
+
else:
|
566
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
567
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
568
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
569
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
570
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
571
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
572
|
+
|
557
573
|
output_df = self._batch_inference(
|
558
574
|
dataset=dataset,
|
559
575
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -223,7 +223,7 @@ class Lars(BaseTransformer):
|
|
223
223
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
224
224
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
225
225
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
226
|
-
self._handlers:
|
226
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=Lars.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
227
227
|
self._autogenerated = True
|
228
228
|
|
229
229
|
def _get_rand_id(self) -> str:
|
@@ -583,6 +583,22 @@ class Lars(BaseTransformer):
|
|
583
583
|
# each row containing a list of values.
|
584
584
|
expected_dtype = "ARRAY"
|
585
585
|
|
586
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
587
|
+
if expected_dtype == "":
|
588
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
589
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
590
|
+
expected_dtype = "ARRAY"
|
591
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
592
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
593
|
+
expected_dtype = "ARRAY"
|
594
|
+
else:
|
595
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
596
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
597
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
598
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
599
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
600
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
601
|
+
|
586
602
|
output_df = self._batch_inference(
|
587
603
|
dataset=dataset,
|
588
604
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -231,7 +231,7 @@ class LarsCV(BaseTransformer):
|
|
231
231
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
232
232
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
233
233
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
234
|
-
self._handlers:
|
234
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=LarsCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
235
235
|
self._autogenerated = True
|
236
236
|
|
237
237
|
def _get_rand_id(self) -> str:
|
@@ -591,6 +591,22 @@ class LarsCV(BaseTransformer):
|
|
591
591
|
# each row containing a list of values.
|
592
592
|
expected_dtype = "ARRAY"
|
593
593
|
|
594
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
595
|
+
if expected_dtype == "":
|
596
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
597
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
598
|
+
expected_dtype = "ARRAY"
|
599
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
600
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
601
|
+
expected_dtype = "ARRAY"
|
602
|
+
else:
|
603
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
604
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
605
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
606
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
607
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
608
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
609
|
+
|
594
610
|
output_df = self._batch_inference(
|
595
611
|
dataset=dataset,
|
596
612
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -224,7 +224,7 @@ class Lasso(BaseTransformer):
|
|
224
224
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
225
225
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
226
226
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
227
|
-
self._handlers:
|
227
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=Lasso.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
228
228
|
self._autogenerated = True
|
229
229
|
|
230
230
|
def _get_rand_id(self) -> str:
|
@@ -584,6 +584,22 @@ class Lasso(BaseTransformer):
|
|
584
584
|
# each row containing a list of values.
|
585
585
|
expected_dtype = "ARRAY"
|
586
586
|
|
587
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
588
|
+
if expected_dtype == "":
|
589
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
590
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
591
|
+
expected_dtype = "ARRAY"
|
592
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
593
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
594
|
+
expected_dtype = "ARRAY"
|
595
|
+
else:
|
596
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
597
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
598
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
599
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
600
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
601
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
602
|
+
|
587
603
|
output_df = self._batch_inference(
|
588
604
|
dataset=dataset,
|
589
605
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -252,7 +252,7 @@ class LassoCV(BaseTransformer):
|
|
252
252
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
253
253
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
254
254
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
255
|
-
self._handlers:
|
255
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
256
256
|
self._autogenerated = True
|
257
257
|
|
258
258
|
def _get_rand_id(self) -> str:
|
@@ -612,6 +612,22 @@ class LassoCV(BaseTransformer):
|
|
612
612
|
# each row containing a list of values.
|
613
613
|
expected_dtype = "ARRAY"
|
614
614
|
|
615
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
616
|
+
if expected_dtype == "":
|
617
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
618
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
619
|
+
expected_dtype = "ARRAY"
|
620
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
621
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
622
|
+
expected_dtype = "ARRAY"
|
623
|
+
else:
|
624
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
625
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
626
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
627
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
628
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
629
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
630
|
+
|
615
631
|
output_df = self._batch_inference(
|
616
632
|
dataset=dataset,
|
617
633
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -244,7 +244,7 @@ class LassoLars(BaseTransformer):
|
|
244
244
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
245
245
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
246
246
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
247
|
-
self._handlers:
|
247
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoLars.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
248
248
|
self._autogenerated = True
|
249
249
|
|
250
250
|
def _get_rand_id(self) -> str:
|
@@ -604,6 +604,22 @@ class LassoLars(BaseTransformer):
|
|
604
604
|
# each row containing a list of values.
|
605
605
|
expected_dtype = "ARRAY"
|
606
606
|
|
607
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
608
|
+
if expected_dtype == "":
|
609
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
610
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
611
|
+
expected_dtype = "ARRAY"
|
612
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
613
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
614
|
+
expected_dtype = "ARRAY"
|
615
|
+
else:
|
616
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
617
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
618
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
619
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
620
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
621
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
622
|
+
|
607
623
|
output_df = self._batch_inference(
|
608
624
|
dataset=dataset,
|
609
625
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -245,7 +245,7 @@ class LassoLarsCV(BaseTransformer):
|
|
245
245
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
246
246
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
247
247
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
248
|
-
self._handlers:
|
248
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoLarsCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
249
249
|
self._autogenerated = True
|
250
250
|
|
251
251
|
def _get_rand_id(self) -> str:
|
@@ -605,6 +605,22 @@ class LassoLarsCV(BaseTransformer):
|
|
605
605
|
# each row containing a list of values.
|
606
606
|
expected_dtype = "ARRAY"
|
607
607
|
|
608
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
609
|
+
if expected_dtype == "":
|
610
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
611
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
612
|
+
expected_dtype = "ARRAY"
|
613
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
614
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
615
|
+
expected_dtype = "ARRAY"
|
616
|
+
else:
|
617
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
618
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
619
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
620
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
621
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
622
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
623
|
+
|
608
624
|
output_df = self._batch_inference(
|
609
625
|
dataset=dataset,
|
610
626
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -228,7 +228,7 @@ class LassoLarsIC(BaseTransformer):
|
|
228
228
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
229
229
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
230
230
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
231
|
-
self._handlers:
|
231
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoLarsIC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
232
232
|
self._autogenerated = True
|
233
233
|
|
234
234
|
def _get_rand_id(self) -> str:
|
@@ -588,6 +588,22 @@ class LassoLarsIC(BaseTransformer):
|
|
588
588
|
# each row containing a list of values.
|
589
589
|
expected_dtype = "ARRAY"
|
590
590
|
|
591
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
592
|
+
if expected_dtype == "":
|
593
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
594
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
595
|
+
expected_dtype = "ARRAY"
|
596
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
597
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
598
|
+
expected_dtype = "ARRAY"
|
599
|
+
else:
|
600
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
601
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
602
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
603
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
604
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
605
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
606
|
+
|
591
607
|
output_df = self._batch_inference(
|
592
608
|
dataset=dataset,
|
593
609
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -181,7 +181,7 @@ class LinearRegression(BaseTransformer):
|
|
181
181
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
182
182
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
183
183
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
184
|
-
self._handlers:
|
184
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=LinearRegression.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
185
185
|
self._autogenerated = True
|
186
186
|
|
187
187
|
def _get_rand_id(self) -> str:
|
@@ -541,6 +541,22 @@ class LinearRegression(BaseTransformer):
|
|
541
541
|
# each row containing a list of values.
|
542
542
|
expected_dtype = "ARRAY"
|
543
543
|
|
544
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
545
|
+
if expected_dtype == "":
|
546
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
547
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
548
|
+
expected_dtype = "ARRAY"
|
549
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
550
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
551
|
+
expected_dtype = "ARRAY"
|
552
|
+
else:
|
553
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
554
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
555
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
556
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
557
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
558
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
559
|
+
|
544
560
|
output_df = self._batch_inference(
|
545
561
|
dataset=dataset,
|
546
562
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -295,7 +295,7 @@ class LogisticRegression(BaseTransformer):
|
|
295
295
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
296
296
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
297
297
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
298
|
-
self._handlers:
|
298
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=LogisticRegression.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
299
299
|
self._autogenerated = True
|
300
300
|
|
301
301
|
def _get_rand_id(self) -> str:
|
@@ -655,6 +655,22 @@ class LogisticRegression(BaseTransformer):
|
|
655
655
|
# each row containing a list of values.
|
656
656
|
expected_dtype = "ARRAY"
|
657
657
|
|
658
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
659
|
+
if expected_dtype == "":
|
660
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
661
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
662
|
+
expected_dtype = "ARRAY"
|
663
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
664
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
665
|
+
expected_dtype = "ARRAY"
|
666
|
+
else:
|
667
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
668
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
669
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
670
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
671
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
672
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
673
|
+
|
658
674
|
output_df = self._batch_inference(
|
659
675
|
dataset=dataset,
|
660
676
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -316,7 +316,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
316
316
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
317
317
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
318
318
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
319
|
-
self._handlers:
|
319
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=LogisticRegressionCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
320
320
|
self._autogenerated = True
|
321
321
|
|
322
322
|
def _get_rand_id(self) -> str:
|
@@ -676,6 +676,22 @@ class LogisticRegressionCV(BaseTransformer):
|
|
676
676
|
# each row containing a list of values.
|
677
677
|
expected_dtype = "ARRAY"
|
678
678
|
|
679
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
680
|
+
if expected_dtype == "":
|
681
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
682
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
683
|
+
expected_dtype = "ARRAY"
|
684
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
685
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
686
|
+
expected_dtype = "ARRAY"
|
687
|
+
else:
|
688
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
689
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
690
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
691
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
692
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
693
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
694
|
+
|
679
695
|
output_df = self._batch_inference(
|
680
696
|
dataset=dataset,
|
681
697
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -214,7 +214,7 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
214
214
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
215
215
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
216
216
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
217
|
-
self._handlers:
|
217
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskElasticNet.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
218
218
|
self._autogenerated = True
|
219
219
|
|
220
220
|
def _get_rand_id(self) -> str:
|
@@ -574,6 +574,22 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
574
574
|
# each row containing a list of values.
|
575
575
|
expected_dtype = "ARRAY"
|
576
576
|
|
577
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
578
|
+
if expected_dtype == "":
|
579
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
580
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
581
|
+
expected_dtype = "ARRAY"
|
582
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
583
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
584
|
+
expected_dtype = "ARRAY"
|
585
|
+
else:
|
586
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
587
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
588
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
589
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
590
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
591
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
592
|
+
|
577
593
|
output_df = self._batch_inference(
|
578
594
|
dataset=dataset,
|
579
595
|
inference_method="transform",
|