snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +16 -13
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
- snowflake/ml/_internal/telemetry.py +19 -0
- snowflake/ml/feature_store/__init__.py +9 -0
- snowflake/ml/feature_store/entity.py +73 -0
- snowflake/ml/feature_store/feature_store.py +1657 -0
- snowflake/ml/feature_store/feature_view.py +459 -0
- snowflake/ml/model/_client/ops/model_ops.py +16 -38
- snowflake/ml/model/_client/sql/model.py +1 -7
- snowflake/ml/model/_client/sql/model_version.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
- snowflake/ml/model/model_signature.py +72 -16
- snowflake/ml/model/type_hints.py +12 -0
- snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
- snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
- snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
- snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
- snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
- snowflake/ml/modeling/cluster/birch.py +19 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
- snowflake/ml/modeling/cluster/dbscan.py +19 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
- snowflake/ml/modeling/cluster/k_means.py +19 -3
- snowflake/ml/modeling/cluster/mean_shift.py +19 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
- snowflake/ml/modeling/cluster/optics.py +19 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
- snowflake/ml/modeling/compose/column_transformer.py +19 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
- snowflake/ml/modeling/covariance/oas.py +19 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/pca.py +19 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
- snowflake/ml/modeling/impute/knn_imputer.py +19 -3
- snowflake/ml/modeling/impute/missing_indicator.py +19 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/lars.py +19 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/perceptron.py +19 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ridge.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
- snowflake/ml/modeling/manifold/isomap.py +19 -3
- snowflake/ml/modeling/manifold/mds.py +19 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
- snowflake/ml/modeling/manifold/tsne.py +19 -3
- snowflake/ml/modeling/metrics/classification.py +5 -6
- snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +7 -3
- snowflake/ml/modeling/metrics/regression.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
- snowflake/ml/modeling/svm/linear_svc.py +19 -3
- snowflake/ml/modeling/svm/linear_svr.py +19 -3
- snowflake/ml/modeling/svm/nu_svc.py +19 -3
- snowflake/ml/modeling/svm/nu_svr.py +19 -3
- snowflake/ml/modeling/svm/svc.py +19 -3
- snowflake/ml/modeling/svm/svr.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
- snowflake/ml/registry/registry.py +2 -0
- snowflake/ml/version.py +1 -1
- snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
- snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
- /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
- /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -244,7 +244,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
244
244
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
245
245
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
246
246
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
247
|
-
self._handlers:
|
247
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=FeatureAgglomeration.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
248
248
|
self._autogenerated = True
|
249
249
|
|
250
250
|
def _get_rand_id(self) -> str:
|
@@ -604,6 +604,22 @@ class FeatureAgglomeration(BaseTransformer):
|
|
604
604
|
# each row containing a list of values.
|
605
605
|
expected_dtype = "ARRAY"
|
606
606
|
|
607
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
608
|
+
if expected_dtype == "":
|
609
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
610
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
611
|
+
expected_dtype = "ARRAY"
|
612
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
613
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
614
|
+
expected_dtype = "ARRAY"
|
615
|
+
else:
|
616
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
617
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
618
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
619
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
620
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
621
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
622
|
+
|
607
623
|
output_df = self._batch_inference(
|
608
624
|
dataset=dataset,
|
609
625
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -239,7 +239,7 @@ class KMeans(BaseTransformer):
|
|
239
239
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
240
240
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
241
241
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
242
|
-
self._handlers:
|
242
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=KMeans.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
243
243
|
self._autogenerated = True
|
244
244
|
|
245
245
|
def _get_rand_id(self) -> str:
|
@@ -601,6 +601,22 @@ class KMeans(BaseTransformer):
|
|
601
601
|
# each row containing a list of values.
|
602
602
|
expected_dtype = "ARRAY"
|
603
603
|
|
604
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
605
|
+
if expected_dtype == "":
|
606
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
607
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
608
|
+
expected_dtype = "ARRAY"
|
609
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
610
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
611
|
+
expected_dtype = "ARRAY"
|
612
|
+
else:
|
613
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
614
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
615
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
616
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
617
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
618
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
619
|
+
|
604
620
|
output_df = self._batch_inference(
|
605
621
|
dataset=dataset,
|
606
622
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -215,7 +215,7 @@ class MeanShift(BaseTransformer):
|
|
215
215
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
216
216
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
217
217
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
218
|
-
self._handlers:
|
218
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=MeanShift.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
219
219
|
self._autogenerated = True
|
220
220
|
|
221
221
|
def _get_rand_id(self) -> str:
|
@@ -575,6 +575,22 @@ class MeanShift(BaseTransformer):
|
|
575
575
|
# each row containing a list of values.
|
576
576
|
expected_dtype = "ARRAY"
|
577
577
|
|
578
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
579
|
+
if expected_dtype == "":
|
580
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
581
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
582
|
+
expected_dtype = "ARRAY"
|
583
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
584
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
585
|
+
expected_dtype = "ARRAY"
|
586
|
+
else:
|
587
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
588
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
589
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
590
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
591
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
592
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
593
|
+
|
578
594
|
output_df = self._batch_inference(
|
579
595
|
dataset=dataset,
|
580
596
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -265,7 +265,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
265
265
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
266
266
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
267
267
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
268
|
-
self._handlers:
|
268
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=MiniBatchKMeans.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
269
269
|
self._autogenerated = True
|
270
270
|
|
271
271
|
def _get_rand_id(self) -> str:
|
@@ -627,6 +627,22 @@ class MiniBatchKMeans(BaseTransformer):
|
|
627
627
|
# each row containing a list of values.
|
628
628
|
expected_dtype = "ARRAY"
|
629
629
|
|
630
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
631
|
+
if expected_dtype == "":
|
632
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
633
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
634
|
+
expected_dtype = "ARRAY"
|
635
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
636
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
637
|
+
expected_dtype = "ARRAY"
|
638
|
+
else:
|
639
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
640
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
641
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
642
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
643
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
644
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
645
|
+
|
630
646
|
output_df = self._batch_inference(
|
631
647
|
dataset=dataset,
|
632
648
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -285,7 +285,7 @@ class OPTICS(BaseTransformer):
|
|
285
285
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
286
286
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
287
287
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
288
|
-
self._handlers:
|
288
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=OPTICS.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
289
289
|
self._autogenerated = True
|
290
290
|
|
291
291
|
def _get_rand_id(self) -> str:
|
@@ -643,6 +643,22 @@ class OPTICS(BaseTransformer):
|
|
643
643
|
# each row containing a list of values.
|
644
644
|
expected_dtype = "ARRAY"
|
645
645
|
|
646
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
647
|
+
if expected_dtype == "":
|
648
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
649
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
650
|
+
expected_dtype = "ARRAY"
|
651
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
652
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
653
|
+
expected_dtype = "ARRAY"
|
654
|
+
else:
|
655
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
656
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
657
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
658
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
659
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
660
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
661
|
+
|
646
662
|
output_df = self._batch_inference(
|
647
663
|
dataset=dataset,
|
648
664
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -223,7 +223,7 @@ class SpectralBiclustering(BaseTransformer):
|
|
223
223
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
224
224
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
225
225
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
226
|
-
self._handlers:
|
226
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=SpectralBiclustering.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
227
227
|
self._autogenerated = True
|
228
228
|
|
229
229
|
def _get_rand_id(self) -> str:
|
@@ -581,6 +581,22 @@ class SpectralBiclustering(BaseTransformer):
|
|
581
581
|
# each row containing a list of values.
|
582
582
|
expected_dtype = "ARRAY"
|
583
583
|
|
584
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
585
|
+
if expected_dtype == "":
|
586
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
587
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
588
|
+
expected_dtype = "ARRAY"
|
589
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
590
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
591
|
+
expected_dtype = "ARRAY"
|
592
|
+
else:
|
593
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
594
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
595
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
596
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
597
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
598
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
599
|
+
|
584
600
|
output_df = self._batch_inference(
|
585
601
|
dataset=dataset,
|
586
602
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -281,7 +281,7 @@ class SpectralClustering(BaseTransformer):
|
|
281
281
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
282
282
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
283
283
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
284
|
-
self._handlers:
|
284
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=SpectralClustering.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
285
285
|
self._autogenerated = True
|
286
286
|
|
287
287
|
def _get_rand_id(self) -> str:
|
@@ -639,6 +639,22 @@ class SpectralClustering(BaseTransformer):
|
|
639
639
|
# each row containing a list of values.
|
640
640
|
expected_dtype = "ARRAY"
|
641
641
|
|
642
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
643
|
+
if expected_dtype == "":
|
644
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
645
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
646
|
+
expected_dtype = "ARRAY"
|
647
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
648
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
649
|
+
expected_dtype = "ARRAY"
|
650
|
+
else:
|
651
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
652
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
653
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
654
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
655
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
656
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
657
|
+
|
642
658
|
output_df = self._batch_inference(
|
643
659
|
dataset=dataset,
|
644
660
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -202,7 +202,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
202
202
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
203
203
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
204
204
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
205
|
-
self._handlers:
|
205
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=SpectralCoclustering.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
206
206
|
self._autogenerated = True
|
207
207
|
|
208
208
|
def _get_rand_id(self) -> str:
|
@@ -560,6 +560,22 @@ class SpectralCoclustering(BaseTransformer):
|
|
560
560
|
# each row containing a list of values.
|
561
561
|
expected_dtype = "ARRAY"
|
562
562
|
|
563
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
564
|
+
if expected_dtype == "":
|
565
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
566
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
567
|
+
expected_dtype = "ARRAY"
|
568
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
569
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
570
|
+
expected_dtype = "ARRAY"
|
571
|
+
else:
|
572
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
573
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
574
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
575
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
576
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
577
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
578
|
+
|
563
579
|
output_df = self._batch_inference(
|
564
580
|
dataset=dataset,
|
565
581
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -232,7 +232,7 @@ class ColumnTransformer(BaseTransformer):
|
|
232
232
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
233
233
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
234
234
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
235
|
-
self._handlers:
|
235
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=ColumnTransformer.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
236
236
|
self._autogenerated = True
|
237
237
|
|
238
238
|
def _get_rand_id(self) -> str:
|
@@ -592,6 +592,22 @@ class ColumnTransformer(BaseTransformer):
|
|
592
592
|
# each row containing a list of values.
|
593
593
|
expected_dtype = "ARRAY"
|
594
594
|
|
595
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
596
|
+
if expected_dtype == "":
|
597
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
598
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
599
|
+
expected_dtype = "ARRAY"
|
600
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
601
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
602
|
+
expected_dtype = "ARRAY"
|
603
|
+
else:
|
604
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
605
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
606
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
607
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
608
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
609
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
610
|
+
|
595
611
|
output_df = self._batch_inference(
|
596
612
|
dataset=dataset,
|
597
613
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -193,7 +193,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
193
193
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
194
194
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
195
195
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
196
|
-
self._handlers:
|
196
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=TransformedTargetRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
197
197
|
self._autogenerated = True
|
198
198
|
|
199
199
|
def _get_rand_id(self) -> str:
|
@@ -553,6 +553,22 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
553
553
|
# each row containing a list of values.
|
554
554
|
expected_dtype = "ARRAY"
|
555
555
|
|
556
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
557
|
+
if expected_dtype == "":
|
558
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
559
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
560
|
+
expected_dtype = "ARRAY"
|
561
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
562
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
563
|
+
expected_dtype = "ARRAY"
|
564
|
+
else:
|
565
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
566
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
567
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
568
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
569
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
570
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
571
|
+
|
556
572
|
output_df = self._batch_inference(
|
557
573
|
dataset=dataset,
|
558
574
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -188,7 +188,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
188
188
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
189
189
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
190
190
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
191
|
-
self._handlers:
|
191
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=EllipticEnvelope.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
192
192
|
self._autogenerated = True
|
193
193
|
|
194
194
|
def _get_rand_id(self) -> str:
|
@@ -548,6 +548,22 @@ class EllipticEnvelope(BaseTransformer):
|
|
548
548
|
# each row containing a list of values.
|
549
549
|
expected_dtype = "ARRAY"
|
550
550
|
|
551
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
552
|
+
if expected_dtype == "":
|
553
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
554
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
555
|
+
expected_dtype = "ARRAY"
|
556
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
557
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
558
|
+
expected_dtype = "ARRAY"
|
559
|
+
else:
|
560
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
561
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
562
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
563
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
564
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
565
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
566
|
+
|
551
567
|
output_df = self._batch_inference(
|
552
568
|
dataset=dataset,
|
553
569
|
inference_method="transform",
|
@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
|
26
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
27
27
|
from snowflake.snowpark import DataFrame, Session
|
28
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
29
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
30
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
31
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
32
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
35
35
|
transform_snowml_obj_to_sklearn_obj,
|
36
36
|
validate_sklearn_args,
|
37
37
|
)
|
38
|
-
from snowflake.ml.modeling._internal.estimator_protocols import
|
38
|
+
from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
|
39
39
|
|
40
40
|
from snowflake.ml.model.model_signature import (
|
41
41
|
DataType,
|
@@ -164,7 +164,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
164
164
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
165
165
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
166
166
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
167
|
-
self._handlers:
|
167
|
+
self._handlers: TransformerHandlers = HandlersImpl(class_name=EmpiricalCovariance.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
168
168
|
self._autogenerated = True
|
169
169
|
|
170
170
|
def _get_rand_id(self) -> str:
|
@@ -522,6 +522,22 @@ class EmpiricalCovariance(BaseTransformer):
|
|
522
522
|
# each row containing a list of values.
|
523
523
|
expected_dtype = "ARRAY"
|
524
524
|
|
525
|
+
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
526
|
+
if expected_dtype == "":
|
527
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
|
528
|
+
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
529
|
+
expected_dtype = "ARRAY"
|
530
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
|
531
|
+
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
532
|
+
expected_dtype = "ARRAY"
|
533
|
+
else:
|
534
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
535
|
+
# We can only infer the output types from the input types if the following two statemetns are true:
|
536
|
+
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
537
|
+
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
538
|
+
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
539
|
+
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
540
|
+
|
525
541
|
output_df = self._batch_inference(
|
526
542
|
dataset=dataset,
|
527
543
|
inference_method="transform",
|