PyPI - snowflake-ml-python - Versions diffs - 1.1.2__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

snowflake-ml-python 1.1.2py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

snowflake/ml/{model/_deploy_client/utils → _internal/container_services/image_registry}/imagelib.py +3 -1
snowflake/ml/{model/_deploy_client/utils/image_registry_client.py → _internal/container_services/image_registry/registry_client.py} +4 -2
snowflake/ml/_internal/env_utils.py +31 -52
snowflake/ml/_internal/file_utils.py +17 -0
snowflake/ml/_internal/telemetry.py +19 -0
snowflake/ml/_internal/utils/query_result_checker.py +8 -5
snowflake/ml/_internal/utils/snowflake_env.py +95 -0
snowflake/ml/fileset/parquet_parser.py +31 -1
snowflake/ml/model/__init__.py +6 -0
snowflake/ml/model/_client/model/model_impl.py +172 -13
snowflake/ml/model/_client/model/model_version_impl.py +96 -52
snowflake/ml/model/_client/ops/metadata_ops.py +1 -3
snowflake/ml/model/_client/ops/model_ops.py +155 -9
snowflake/ml/model/_client/sql/model.py +55 -10
snowflake/ml/model/_client/sql/model_version.py +72 -61
snowflake/ml/model/_client/sql/stage.py +10 -4
snowflake/ml/model/_client/sql/tag.py +118 -0
snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +2 -2
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +8 -8
snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +4 -6
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +6 -7
snowflake/ml/model/_deploy_client/snowservice/deploy.py +4 -5
snowflake/ml/model/_deploy_client/snowservice/instance_types.py +9 -1
snowflake/ml/model/_deploy_client/warehouse/deploy.py +20 -11
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +45 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +30 -0
snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -1
snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +10 -7
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +1 -1
snowflake/ml/model/_packager/model_handlers/xgboost.py +13 -2
snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
snowflake/ml/model/_signatures/core.py +20 -17
snowflake/ml/model/custom_model.py +30 -27
snowflake/ml/model/model_signature.py +16 -17
snowflake/ml/model/type_hints.py +3 -0
snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +185 -98
snowflake/ml/modeling/_internal/estimator_utils.py +21 -0
snowflake/ml/modeling/_internal/model_specifications.py +3 -10
snowflake/ml/modeling/_internal/model_trainer_builder.py +55 -11
snowflake/ml/modeling/_internal/snowpark_handlers.py +9 -6
snowflake/ml/modeling/_internal/snowpark_trainer.py +10 -2
snowflake/ml/modeling/_internal/xgboost_external_memory_trainer.py +444 -0
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -16
snowflake/ml/modeling/cluster/affinity_propagation.py +51 -16
snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -16
snowflake/ml/modeling/cluster/birch.py +51 -16
snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -16
snowflake/ml/modeling/cluster/dbscan.py +51 -16
snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -16
snowflake/ml/modeling/cluster/k_means.py +51 -16
snowflake/ml/modeling/cluster/mean_shift.py +51 -16
snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -16
snowflake/ml/modeling/cluster/optics.py +51 -16
snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -16
snowflake/ml/modeling/cluster/spectral_clustering.py +51 -16
snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -16
snowflake/ml/modeling/compose/column_transformer.py +51 -16
snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -16
snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -16
snowflake/ml/modeling/covariance/empirical_covariance.py +51 -16
snowflake/ml/modeling/covariance/graphical_lasso.py +51 -16
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -16
snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -16
snowflake/ml/modeling/covariance/min_cov_det.py +51 -16
snowflake/ml/modeling/covariance/oas.py +51 -16
snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -16
snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -16
snowflake/ml/modeling/decomposition/factor_analysis.py +51 -16
snowflake/ml/modeling/decomposition/fast_ica.py +51 -16
snowflake/ml/modeling/decomposition/incremental_pca.py +51 -16
snowflake/ml/modeling/decomposition/kernel_pca.py +51 -16
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -16
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -16
snowflake/ml/modeling/decomposition/pca.py +51 -16
snowflake/ml/modeling/decomposition/sparse_pca.py +51 -16
snowflake/ml/modeling/decomposition/truncated_svd.py +51 -16
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -16
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -16
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -16
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -16
snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -16
snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -16
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -16
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -16
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -16
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -16
snowflake/ml/modeling/ensemble/isolation_forest.py +51 -16
snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -16
snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -16
snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -16
snowflake/ml/modeling/ensemble/voting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/voting_regressor.py +51 -16
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -16
snowflake/ml/modeling/feature_selection/select_fdr.py +51 -16
snowflake/ml/modeling/feature_selection/select_fpr.py +51 -16
snowflake/ml/modeling/feature_selection/select_fwe.py +51 -16
snowflake/ml/modeling/feature_selection/select_k_best.py +51 -16
snowflake/ml/modeling/feature_selection/select_percentile.py +51 -16
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -16
snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -16
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -16
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -16
snowflake/ml/modeling/impute/iterative_imputer.py +51 -16
snowflake/ml/modeling/impute/knn_imputer.py +51 -16
snowflake/ml/modeling/impute/missing_indicator.py +51 -16
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -16
snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -16
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -16
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -16
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -16
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -16
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -16
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ard_regression.py +51 -16
snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -16
snowflake/ml/modeling/linear_model/elastic_net.py +51 -16
snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -16
snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -16
snowflake/ml/modeling/linear_model/huber_regressor.py +51 -16
snowflake/ml/modeling/linear_model/lars.py +51 -16
snowflake/ml/modeling/linear_model/lars_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso.py +51 -16
snowflake/ml/modeling/linear_model/lasso_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -16
snowflake/ml/modeling/linear_model/linear_regression.py +51 -16
snowflake/ml/modeling/linear_model/logistic_regression.py +51 -16
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -16
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -16
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -16
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -16
snowflake/ml/modeling/linear_model/perceptron.py +51 -16
snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ridge.py +51 -16
snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -16
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -16
snowflake/ml/modeling/linear_model/ridge_cv.py +51 -16
snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -16
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -16
snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -16
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -16
snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -16
snowflake/ml/modeling/manifold/isomap.py +51 -16
snowflake/ml/modeling/manifold/mds.py +51 -16
snowflake/ml/modeling/manifold/spectral_embedding.py +51 -16
snowflake/ml/modeling/manifold/tsne.py +51 -16
snowflake/ml/modeling/metrics/classification.py +5 -6
snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
snowflake/ml/modeling/metrics/ranking.py +7 -3
snowflake/ml/modeling/metrics/regression.py +6 -3
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -16
snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -16
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -16
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -16
snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -16
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -16
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -16
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -16
snowflake/ml/modeling/neighbors/kernel_density.py +51 -16
snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -16
snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -16
snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -16
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -16
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -16
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -16
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -16
snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -16
snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -16
snowflake/ml/modeling/preprocessing/min_max_scaler.py +15 -1
snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -16
snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -16
snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -16
snowflake/ml/modeling/svm/linear_svc.py +51 -16
snowflake/ml/modeling/svm/linear_svr.py +51 -16
snowflake/ml/modeling/svm/nu_svc.py +51 -16
snowflake/ml/modeling/svm/nu_svr.py +51 -16
snowflake/ml/modeling/svm/svc.py +51 -16
snowflake/ml/modeling/svm/svr.py +51 -16
snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -16
snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -16
snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -16
snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -16
snowflake/ml/modeling/xgboost/xgb_classifier.py +69 -16
snowflake/ml/modeling/xgboost/xgb_regressor.py +69 -16
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +69 -16
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +69 -16
snowflake/ml/registry/__init__.py +3 -0
snowflake/ml/registry/_manager/model_manager.py +163 -0
snowflake/ml/registry/model_registry.py +12 -0
snowflake/ml/registry/registry.py +100 -90
snowflake/ml/version.py +1 -1
snowflake_ml_python-1.2.1.dist-info/LICENSE.txt +202 -0
{snowflake_ml_python-1.1.2.dist-info → snowflake_ml_python-1.2.1.dist-info}/METADATA +295 -60
snowflake_ml_python-1.2.1.dist-info/RECORD +355 -0
{snowflake_ml_python-1.1.2.dist-info → snowflake_ml_python-1.2.1.dist-info}/WHEEL +2 -1
snowflake_ml_python-1.2.1.dist-info/top_level.txt +1 -0
snowflake/ml/model/_client/model/model_method_info.py +0 -19
snowflake_ml_python-1.1.2.dist-info/RECORD +0 -347
/snowflake/ml/_internal/{utils/spcs_image_registry.py → container_services/image_registry/credential.py} +0 -0
/snowflake/ml/_internal/{utils/image_registry_http_client.py → container_services/image_registry/http_client.py} +0 -0

snowflake/ml/modeling/ensemble/random_forest_classifier.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class RandomForestClassifier(BaseTransformer):
     r"""A random forest classifier
     For more details on this class, see [sklearn.ensemble.RandomForestClassifier]
@@ -290,7 +302,9 @@ class RandomForestClassifier(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -383,11 +397,6 @@ class RandomForestClassifier(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -415,7 +424,9 @@ class RandomForestClassifier(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -686,6 +697,22 @@ class RandomForestClassifier(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -701,8 +728,8 @@ class RandomForestClassifier(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -715,13 +742,21 @@ class RandomForestClassifier(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/ensemble/random_forest_regressor.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class RandomForestRegressor(BaseTransformer):
     r"""A random forest regressor
     For more details on this class, see [sklearn.ensemble.RandomForestRegressor]
@@ -270,7 +282,9 @@ class RandomForestRegressor(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -362,11 +376,6 @@ class RandomForestRegressor(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -394,7 +403,9 @@ class RandomForestRegressor(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -665,6 +676,22 @@ class RandomForestRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -680,8 +707,8 @@ class RandomForestRegressor(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -694,13 +721,21 @@ class RandomForestRegressor(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/ensemble/stacking_regressor.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class StackingRegressor(BaseTransformer):
     r"""Stack of estimators with a final regressor
     For more details on this class, see [sklearn.ensemble.StackingRegressor]
@@ -180,7 +192,9 @@ class StackingRegressor(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         deps = deps | gather_dependencies(estimators)
         deps = deps | gather_dependencies(final_estimator)
@@ -263,11 +277,6 @@ class StackingRegressor(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -295,7 +304,9 @@ class StackingRegressor(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -568,6 +579,22 @@ class StackingRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -583,8 +610,8 @@ class StackingRegressor(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -597,13 +624,21 @@ class StackingRegressor(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/ensemble/voting_classifier.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class VotingClassifier(BaseTransformer):
     r"""Soft Voting/Majority Rule classifier for unfitted estimators
     For more details on this class, see [sklearn.ensemble.VotingClassifier]
@@ -164,7 +176,9 @@ class VotingClassifier(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         deps = deps | gather_dependencies(estimators)
         self._deps = list(deps)
@@ -245,11 +259,6 @@ class VotingClassifier(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -277,7 +286,9 @@ class VotingClassifier(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -550,6 +561,22 @@ class VotingClassifier(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -565,8 +592,8 @@ class VotingClassifier(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -579,13 +606,21 @@ class VotingClassifier(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/ensemble/voting_regressor.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class VotingRegressor(BaseTransformer):
     r"""Prediction voting regressor for unfitted estimators
     For more details on this class, see [sklearn.ensemble.VotingRegressor]
@@ -148,7 +160,9 @@ class VotingRegressor(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         deps = deps | gather_dependencies(estimators)
         self._deps = list(deps)
@@ -227,11 +241,6 @@ class VotingRegressor(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -259,7 +268,9 @@ class VotingRegressor(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -532,6 +543,22 @@ class VotingRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -547,8 +574,8 @@ class VotingRegressor(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -561,13 +588,21 @@ class VotingRegressor(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake-ml-python 1.1.2__py3-none-any.whl → 1.2.1__py3-none-any.whl

snowflake-ml-python 1.1.2py3-none-any.whl → 1.2.1py3-none-any.whl