PyPI - snowflake-ml-python - Versions diffs - 1.1.2__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

snowflake-ml-python 1.1.2py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

snowflake/ml/{model/_deploy_client/utils → _internal/container_services/image_registry}/imagelib.py +3 -1
snowflake/ml/{model/_deploy_client/utils/image_registry_client.py → _internal/container_services/image_registry/registry_client.py} +4 -2
snowflake/ml/_internal/env_utils.py +31 -52
snowflake/ml/_internal/file_utils.py +17 -0
snowflake/ml/_internal/telemetry.py +19 -0
snowflake/ml/_internal/utils/query_result_checker.py +8 -5
snowflake/ml/_internal/utils/snowflake_env.py +95 -0
snowflake/ml/fileset/parquet_parser.py +31 -1
snowflake/ml/model/__init__.py +6 -0
snowflake/ml/model/_client/model/model_impl.py +172 -13
snowflake/ml/model/_client/model/model_version_impl.py +96 -52
snowflake/ml/model/_client/ops/metadata_ops.py +1 -3
snowflake/ml/model/_client/ops/model_ops.py +155 -9
snowflake/ml/model/_client/sql/model.py +55 -10
snowflake/ml/model/_client/sql/model_version.py +72 -61
snowflake/ml/model/_client/sql/stage.py +10 -4
snowflake/ml/model/_client/sql/tag.py +118 -0
snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +2 -2
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +8 -8
snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +4 -6
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +6 -7
snowflake/ml/model/_deploy_client/snowservice/deploy.py +4 -5
snowflake/ml/model/_deploy_client/snowservice/instance_types.py +9 -1
snowflake/ml/model/_deploy_client/warehouse/deploy.py +20 -11
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +45 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +30 -0
snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -1
snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +10 -7
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +1 -1
snowflake/ml/model/_packager/model_handlers/xgboost.py +13 -2
snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
snowflake/ml/model/_signatures/core.py +20 -17
snowflake/ml/model/custom_model.py +30 -27
snowflake/ml/model/model_signature.py +16 -17
snowflake/ml/model/type_hints.py +3 -0
snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +185 -98
snowflake/ml/modeling/_internal/estimator_utils.py +21 -0
snowflake/ml/modeling/_internal/model_specifications.py +3 -10
snowflake/ml/modeling/_internal/model_trainer_builder.py +55 -11
snowflake/ml/modeling/_internal/snowpark_handlers.py +9 -6
snowflake/ml/modeling/_internal/snowpark_trainer.py +10 -2
snowflake/ml/modeling/_internal/xgboost_external_memory_trainer.py +444 -0
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -16
snowflake/ml/modeling/cluster/affinity_propagation.py +51 -16
snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -16
snowflake/ml/modeling/cluster/birch.py +51 -16
snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -16
snowflake/ml/modeling/cluster/dbscan.py +51 -16
snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -16
snowflake/ml/modeling/cluster/k_means.py +51 -16
snowflake/ml/modeling/cluster/mean_shift.py +51 -16
snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -16
snowflake/ml/modeling/cluster/optics.py +51 -16
snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -16
snowflake/ml/modeling/cluster/spectral_clustering.py +51 -16
snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -16
snowflake/ml/modeling/compose/column_transformer.py +51 -16
snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -16
snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -16
snowflake/ml/modeling/covariance/empirical_covariance.py +51 -16
snowflake/ml/modeling/covariance/graphical_lasso.py +51 -16
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -16
snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -16
snowflake/ml/modeling/covariance/min_cov_det.py +51 -16
snowflake/ml/modeling/covariance/oas.py +51 -16
snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -16
snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -16
snowflake/ml/modeling/decomposition/factor_analysis.py +51 -16
snowflake/ml/modeling/decomposition/fast_ica.py +51 -16
snowflake/ml/modeling/decomposition/incremental_pca.py +51 -16
snowflake/ml/modeling/decomposition/kernel_pca.py +51 -16
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -16
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -16
snowflake/ml/modeling/decomposition/pca.py +51 -16
snowflake/ml/modeling/decomposition/sparse_pca.py +51 -16
snowflake/ml/modeling/decomposition/truncated_svd.py +51 -16
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -16
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -16
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -16
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -16
snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -16
snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -16
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -16
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -16
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -16
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -16
snowflake/ml/modeling/ensemble/isolation_forest.py +51 -16
snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -16
snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -16
snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -16
snowflake/ml/modeling/ensemble/voting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/voting_regressor.py +51 -16
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -16
snowflake/ml/modeling/feature_selection/select_fdr.py +51 -16
snowflake/ml/modeling/feature_selection/select_fpr.py +51 -16
snowflake/ml/modeling/feature_selection/select_fwe.py +51 -16
snowflake/ml/modeling/feature_selection/select_k_best.py +51 -16
snowflake/ml/modeling/feature_selection/select_percentile.py +51 -16
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -16
snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -16
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -16
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -16
snowflake/ml/modeling/impute/iterative_imputer.py +51 -16
snowflake/ml/modeling/impute/knn_imputer.py +51 -16
snowflake/ml/modeling/impute/missing_indicator.py +51 -16
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -16
snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -16
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -16
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -16
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -16
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -16
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -16
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ard_regression.py +51 -16
snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -16
snowflake/ml/modeling/linear_model/elastic_net.py +51 -16
snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -16
snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -16
snowflake/ml/modeling/linear_model/huber_regressor.py +51 -16
snowflake/ml/modeling/linear_model/lars.py +51 -16
snowflake/ml/modeling/linear_model/lars_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso.py +51 -16
snowflake/ml/modeling/linear_model/lasso_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -16
snowflake/ml/modeling/linear_model/linear_regression.py +51 -16
snowflake/ml/modeling/linear_model/logistic_regression.py +51 -16
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -16
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -16
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -16
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -16
snowflake/ml/modeling/linear_model/perceptron.py +51 -16
snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ridge.py +51 -16
snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -16
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -16
snowflake/ml/modeling/linear_model/ridge_cv.py +51 -16
snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -16
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -16
snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -16
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -16
snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -16
snowflake/ml/modeling/manifold/isomap.py +51 -16
snowflake/ml/modeling/manifold/mds.py +51 -16
snowflake/ml/modeling/manifold/spectral_embedding.py +51 -16
snowflake/ml/modeling/manifold/tsne.py +51 -16
snowflake/ml/modeling/metrics/classification.py +5 -6
snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
snowflake/ml/modeling/metrics/ranking.py +7 -3
snowflake/ml/modeling/metrics/regression.py +6 -3
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -16
snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -16
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -16
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -16
snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -16
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -16
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -16
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -16
snowflake/ml/modeling/neighbors/kernel_density.py +51 -16
snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -16
snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -16
snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -16
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -16
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -16
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -16
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -16
snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -16
snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -16
snowflake/ml/modeling/preprocessing/min_max_scaler.py +15 -1
snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -16
snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -16
snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -16
snowflake/ml/modeling/svm/linear_svc.py +51 -16
snowflake/ml/modeling/svm/linear_svr.py +51 -16
snowflake/ml/modeling/svm/nu_svc.py +51 -16
snowflake/ml/modeling/svm/nu_svr.py +51 -16
snowflake/ml/modeling/svm/svc.py +51 -16
snowflake/ml/modeling/svm/svr.py +51 -16
snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -16
snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -16
snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -16
snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -16
snowflake/ml/modeling/xgboost/xgb_classifier.py +69 -16
snowflake/ml/modeling/xgboost/xgb_regressor.py +69 -16
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +69 -16
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +69 -16
snowflake/ml/registry/__init__.py +3 -0
snowflake/ml/registry/_manager/model_manager.py +163 -0
snowflake/ml/registry/model_registry.py +12 -0
snowflake/ml/registry/registry.py +100 -90
snowflake/ml/version.py +1 -1
snowflake_ml_python-1.2.1.dist-info/LICENSE.txt +202 -0
{snowflake_ml_python-1.1.2.dist-info → snowflake_ml_python-1.2.1.dist-info}/METADATA +295 -60
snowflake_ml_python-1.2.1.dist-info/RECORD +355 -0
{snowflake_ml_python-1.1.2.dist-info → snowflake_ml_python-1.2.1.dist-info}/WHEEL +2 -1
snowflake_ml_python-1.2.1.dist-info/top_level.txt +1 -0
snowflake/ml/model/_client/model/model_method_info.py +0 -19
snowflake_ml_python-1.1.2.dist-info/RECORD +0 -347
/snowflake/ml/_internal/{utils/spcs_image_registry.py → container_services/image_registry/credential.py} +0 -0
/snowflake/ml/_internal/{utils/image_registry_http_client.py → container_services/image_registry/http_client.py} +0 -0

snowflake/ml/modeling/naive_bayes/bernoulli_nb.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.naive_bayes".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class BernoulliNB(BaseTransformer):
     r"""Naive Bayes classifier for multivariate Bernoulli models
     For more details on this class, see [sklearn.naive_bayes.BernoulliNB]
@@ -150,7 +162,9 @@ class BernoulliNB(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -230,11 +244,6 @@ class BernoulliNB(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -262,7 +271,9 @@ class BernoulliNB(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -533,6 +544,22 @@ class BernoulliNB(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -548,8 +575,8 @@ class BernoulliNB(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -562,13 +589,21 @@ class BernoulliNB(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/naive_bayes/categorical_nb.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.naive_bayes".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class CategoricalNB(BaseTransformer):
     r"""Naive Bayes classifier for categorical features
     For more details on this class, see [sklearn.naive_bayes.CategoricalNB]
@@ -156,7 +168,9 @@ class CategoricalNB(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -236,11 +250,6 @@ class CategoricalNB(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -268,7 +277,9 @@ class CategoricalNB(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -539,6 +550,22 @@ class CategoricalNB(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -554,8 +581,8 @@ class CategoricalNB(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -568,13 +595,21 @@ class CategoricalNB(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/naive_bayes/complement_nb.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.naive_bayes".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class ComplementNB(BaseTransformer):
     r"""The Complement Naive Bayes classifier described in Rennie et al
     For more details on this class, see [sklearn.naive_bayes.ComplementNB]
@@ -150,7 +162,9 @@ class ComplementNB(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -230,11 +244,6 @@ class ComplementNB(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -262,7 +271,9 @@ class ComplementNB(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -533,6 +544,22 @@ class ComplementNB(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -548,8 +575,8 @@ class ComplementNB(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -562,13 +589,21 @@ class ComplementNB(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/naive_bayes/gaussian_nb.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.naive_bayes".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class GaussianNB(BaseTransformer):
     r"""Gaussian Naive Bayes (GaussianNB)
     For more details on this class, see [sklearn.naive_bayes.GaussianNB]
@@ -134,7 +146,9 @@ class GaussianNB(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -211,11 +225,6 @@ class GaussianNB(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -243,7 +252,9 @@ class GaussianNB(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -514,6 +525,22 @@ class GaussianNB(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -529,8 +556,8 @@ class GaussianNB(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -543,13 +570,21 @@ class GaussianNB(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/naive_bayes/multinomial_nb.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.naive_bayes".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class MultinomialNB(BaseTransformer):
     r"""Naive Bayes classifier for multinomial models
     For more details on this class, see [sklearn.naive_bayes.MultinomialNB]
@@ -145,7 +157,9 @@ class MultinomialNB(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -224,11 +238,6 @@ class MultinomialNB(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -256,7 +265,9 @@ class MultinomialNB(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -527,6 +538,22 @@ class MultinomialNB(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -542,8 +569,8 @@ class MultinomialNB(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -556,13 +583,21 @@ class MultinomialNB(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake-ml-python 1.1.2__py3-none-any.whl → 1.2.1__py3-none-any.whl

snowflake-ml-python 1.1.2py3-none-any.whl → 1.2.1py3-none-any.whl