PyPI - snowflake-ml-python - Versions diffs - 1.1.2__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

snowflake-ml-python 1.1.2py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

snowflake/ml/{model/_deploy_client/utils → _internal/container_services/image_registry}/imagelib.py +3 -1
snowflake/ml/{model/_deploy_client/utils/image_registry_client.py → _internal/container_services/image_registry/registry_client.py} +4 -2
snowflake/ml/_internal/env_utils.py +31 -52
snowflake/ml/_internal/file_utils.py +17 -0
snowflake/ml/_internal/telemetry.py +19 -0
snowflake/ml/_internal/utils/query_result_checker.py +8 -5
snowflake/ml/_internal/utils/snowflake_env.py +95 -0
snowflake/ml/fileset/parquet_parser.py +31 -1
snowflake/ml/model/__init__.py +6 -0
snowflake/ml/model/_client/model/model_impl.py +172 -13
snowflake/ml/model/_client/model/model_version_impl.py +96 -52
snowflake/ml/model/_client/ops/metadata_ops.py +1 -3
snowflake/ml/model/_client/ops/model_ops.py +155 -9
snowflake/ml/model/_client/sql/model.py +55 -10
snowflake/ml/model/_client/sql/model_version.py +72 -61
snowflake/ml/model/_client/sql/stage.py +10 -4
snowflake/ml/model/_client/sql/tag.py +118 -0
snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +2 -2
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +8 -8
snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +4 -6
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +6 -7
snowflake/ml/model/_deploy_client/snowservice/deploy.py +4 -5
snowflake/ml/model/_deploy_client/snowservice/instance_types.py +9 -1
snowflake/ml/model/_deploy_client/warehouse/deploy.py +20 -11
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +45 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +30 -0
snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -1
snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +10 -7
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +1 -1
snowflake/ml/model/_packager/model_handlers/xgboost.py +13 -2
snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
snowflake/ml/model/_signatures/core.py +20 -17
snowflake/ml/model/custom_model.py +30 -27
snowflake/ml/model/model_signature.py +16 -17
snowflake/ml/model/type_hints.py +3 -0
snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +185 -98
snowflake/ml/modeling/_internal/estimator_utils.py +21 -0
snowflake/ml/modeling/_internal/model_specifications.py +3 -10
snowflake/ml/modeling/_internal/model_trainer_builder.py +55 -11
snowflake/ml/modeling/_internal/snowpark_handlers.py +9 -6
snowflake/ml/modeling/_internal/snowpark_trainer.py +10 -2
snowflake/ml/modeling/_internal/xgboost_external_memory_trainer.py +444 -0
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -16
snowflake/ml/modeling/cluster/affinity_propagation.py +51 -16
snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -16
snowflake/ml/modeling/cluster/birch.py +51 -16
snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -16
snowflake/ml/modeling/cluster/dbscan.py +51 -16
snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -16
snowflake/ml/modeling/cluster/k_means.py +51 -16
snowflake/ml/modeling/cluster/mean_shift.py +51 -16
snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -16
snowflake/ml/modeling/cluster/optics.py +51 -16
snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -16
snowflake/ml/modeling/cluster/spectral_clustering.py +51 -16
snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -16
snowflake/ml/modeling/compose/column_transformer.py +51 -16
snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -16
snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -16
snowflake/ml/modeling/covariance/empirical_covariance.py +51 -16
snowflake/ml/modeling/covariance/graphical_lasso.py +51 -16
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -16
snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -16
snowflake/ml/modeling/covariance/min_cov_det.py +51 -16
snowflake/ml/modeling/covariance/oas.py +51 -16
snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -16
snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -16
snowflake/ml/modeling/decomposition/factor_analysis.py +51 -16
snowflake/ml/modeling/decomposition/fast_ica.py +51 -16
snowflake/ml/modeling/decomposition/incremental_pca.py +51 -16
snowflake/ml/modeling/decomposition/kernel_pca.py +51 -16
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -16
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -16
snowflake/ml/modeling/decomposition/pca.py +51 -16
snowflake/ml/modeling/decomposition/sparse_pca.py +51 -16
snowflake/ml/modeling/decomposition/truncated_svd.py +51 -16
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -16
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -16
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -16
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -16
snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -16
snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -16
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -16
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -16
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -16
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -16
snowflake/ml/modeling/ensemble/isolation_forest.py +51 -16
snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -16
snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -16
snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -16
snowflake/ml/modeling/ensemble/voting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/voting_regressor.py +51 -16
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -16
snowflake/ml/modeling/feature_selection/select_fdr.py +51 -16
snowflake/ml/modeling/feature_selection/select_fpr.py +51 -16
snowflake/ml/modeling/feature_selection/select_fwe.py +51 -16
snowflake/ml/modeling/feature_selection/select_k_best.py +51 -16
snowflake/ml/modeling/feature_selection/select_percentile.py +51 -16
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -16
snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -16
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -16
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -16
snowflake/ml/modeling/impute/iterative_imputer.py +51 -16
snowflake/ml/modeling/impute/knn_imputer.py +51 -16
snowflake/ml/modeling/impute/missing_indicator.py +51 -16
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -16
snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -16
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -16
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -16
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -16
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -16
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -16
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ard_regression.py +51 -16
snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -16
snowflake/ml/modeling/linear_model/elastic_net.py +51 -16
snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -16
snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -16
snowflake/ml/modeling/linear_model/huber_regressor.py +51 -16
snowflake/ml/modeling/linear_model/lars.py +51 -16
snowflake/ml/modeling/linear_model/lars_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso.py +51 -16
snowflake/ml/modeling/linear_model/lasso_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -16
snowflake/ml/modeling/linear_model/linear_regression.py +51 -16
snowflake/ml/modeling/linear_model/logistic_regression.py +51 -16
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -16
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -16
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -16
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -16
snowflake/ml/modeling/linear_model/perceptron.py +51 -16
snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ridge.py +51 -16
snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -16
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -16
snowflake/ml/modeling/linear_model/ridge_cv.py +51 -16
snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -16
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -16
snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -16
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -16
snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -16
snowflake/ml/modeling/manifold/isomap.py +51 -16
snowflake/ml/modeling/manifold/mds.py +51 -16
snowflake/ml/modeling/manifold/spectral_embedding.py +51 -16
snowflake/ml/modeling/manifold/tsne.py +51 -16
snowflake/ml/modeling/metrics/classification.py +5 -6
snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
snowflake/ml/modeling/metrics/ranking.py +7 -3
snowflake/ml/modeling/metrics/regression.py +6 -3
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -16
snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -16
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -16
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -16
snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -16
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -16
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -16
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -16
snowflake/ml/modeling/neighbors/kernel_density.py +51 -16
snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -16
snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -16
snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -16
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -16
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -16
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -16
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -16
snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -16
snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -16
snowflake/ml/modeling/preprocessing/min_max_scaler.py +15 -1
snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -16
snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -16
snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -16
snowflake/ml/modeling/svm/linear_svc.py +51 -16
snowflake/ml/modeling/svm/linear_svr.py +51 -16
snowflake/ml/modeling/svm/nu_svc.py +51 -16
snowflake/ml/modeling/svm/nu_svr.py +51 -16
snowflake/ml/modeling/svm/svc.py +51 -16
snowflake/ml/modeling/svm/svr.py +51 -16
snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -16
snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -16
snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -16
snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -16
snowflake/ml/modeling/xgboost/xgb_classifier.py +69 -16
snowflake/ml/modeling/xgboost/xgb_regressor.py +69 -16
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +69 -16
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +69 -16
snowflake/ml/registry/__init__.py +3 -0
snowflake/ml/registry/_manager/model_manager.py +163 -0
snowflake/ml/registry/model_registry.py +12 -0
snowflake/ml/registry/registry.py +100 -90
snowflake/ml/version.py +1 -1
snowflake_ml_python-1.2.1.dist-info/LICENSE.txt +202 -0
{snowflake_ml_python-1.1.2.dist-info → snowflake_ml_python-1.2.1.dist-info}/METADATA +295 -60
snowflake_ml_python-1.2.1.dist-info/RECORD +355 -0
{snowflake_ml_python-1.1.2.dist-info → snowflake_ml_python-1.2.1.dist-info}/WHEEL +2 -1
snowflake_ml_python-1.2.1.dist-info/top_level.txt +1 -0
snowflake/ml/model/_client/model/model_method_info.py +0 -19
snowflake_ml_python-1.1.2.dist-info/RECORD +0 -347
/snowflake/ml/_internal/{utils/spcs_image_registry.py → container_services/image_registry/credential.py} +0 -0
/snowflake/ml/_internal/{utils/image_registry_http_client.py → container_services/image_registry/http_client.py} +0 -0

snowflake/ml/modeling/lightgbm/lgbm_regressor.py CHANGED Viewed

@@ -53,6 +53,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "lightgbm".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class LGBMRegressor(BaseTransformer):
     r"""LightGBM regressor
     For more details on this class, see [lightgbm.LGBMRegressor]
@@ -144,7 +156,9 @@ class LGBMRegressor(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -240,11 +254,6 @@ class LGBMRegressor(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -272,7 +281,9 @@ class LGBMRegressor(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -543,6 +554,22 @@ class LGBMRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -558,8 +585,8 @@ class LGBMRegressor(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -572,13 +599,21 @@ class LGBMRegressor(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/linear_model/ard_regression.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class ARDRegression(BaseTransformer):
     r"""Bayesian ARD regression
     For more details on this class, see [sklearn.linear_model.ARDRegression]
@@ -179,7 +191,9 @@ class ARDRegression(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -266,11 +280,6 @@ class ARDRegression(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -298,7 +307,9 @@ class ARDRegression(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -569,6 +580,22 @@ class ARDRegression(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -584,8 +611,8 @@ class ARDRegression(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -598,13 +625,21 @@ class ARDRegression(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/linear_model/bayesian_ridge.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class BayesianRidge(BaseTransformer):
     r"""Bayesian ridge regression
     For more details on this class, see [sklearn.linear_model.BayesianRidge]
@@ -189,7 +201,9 @@ class BayesianRidge(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -277,11 +291,6 @@ class BayesianRidge(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -309,7 +318,9 @@ class BayesianRidge(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -580,6 +591,22 @@ class BayesianRidge(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -595,8 +622,8 @@ class BayesianRidge(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -609,13 +636,21 @@ class BayesianRidge(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/linear_model/elastic_net.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class ElasticNet(BaseTransformer):
     r"""Linear regression with combined L1 and L2 priors as regularizer
     For more details on this class, see [sklearn.linear_model.ElasticNet]
@@ -190,7 +202,9 @@ class ElasticNet(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -276,11 +290,6 @@ class ElasticNet(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -308,7 +317,9 @@ class ElasticNet(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -579,6 +590,22 @@ class ElasticNet(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -594,8 +621,8 @@ class ElasticNet(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -608,13 +635,21 @@ class ElasticNet(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/linear_model/elastic_net_cv.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class ElasticNetCV(BaseTransformer):
     r"""Elastic Net model with iterative fitting along a regularization path
     For more details on this class, see [sklearn.linear_model.ElasticNetCV]
@@ -222,7 +234,9 @@ class ElasticNetCV(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -312,11 +326,6 @@ class ElasticNetCV(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -344,7 +353,9 @@ class ElasticNetCV(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -615,6 +626,22 @@ class ElasticNetCV(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -630,8 +657,8 @@ class ElasticNetCV(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -644,13 +671,21 @@ class ElasticNetCV(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake-ml-python 1.1.2__py3-none-any.whl → 1.2.1__py3-none-any.whl

snowflake-ml-python 1.1.2py3-none-any.whl → 1.2.1py3-none-any.whl