PyPI - snowflake-ml-python - Versions diffs - 1.1.2__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

snowflake-ml-python 1.1.2py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

snowflake/ml/{model/_deploy_client/utils → _internal/container_services/image_registry}/imagelib.py +3 -1
snowflake/ml/{model/_deploy_client/utils/image_registry_client.py → _internal/container_services/image_registry/registry_client.py} +4 -2
snowflake/ml/_internal/env_utils.py +31 -52
snowflake/ml/_internal/file_utils.py +17 -0
snowflake/ml/_internal/telemetry.py +19 -0
snowflake/ml/_internal/utils/query_result_checker.py +8 -5
snowflake/ml/_internal/utils/snowflake_env.py +95 -0
snowflake/ml/fileset/parquet_parser.py +31 -1
snowflake/ml/model/__init__.py +6 -0
snowflake/ml/model/_client/model/model_impl.py +172 -13
snowflake/ml/model/_client/model/model_version_impl.py +96 -52
snowflake/ml/model/_client/ops/metadata_ops.py +1 -3
snowflake/ml/model/_client/ops/model_ops.py +155 -9
snowflake/ml/model/_client/sql/model.py +55 -10
snowflake/ml/model/_client/sql/model_version.py +72 -61
snowflake/ml/model/_client/sql/stage.py +10 -4
snowflake/ml/model/_client/sql/tag.py +118 -0
snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +2 -2
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +8 -8
snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +4 -6
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +6 -7
snowflake/ml/model/_deploy_client/snowservice/deploy.py +4 -5
snowflake/ml/model/_deploy_client/snowservice/instance_types.py +9 -1
snowflake/ml/model/_deploy_client/warehouse/deploy.py +20 -11
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +45 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +30 -0
snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -1
snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +10 -7
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +1 -1
snowflake/ml/model/_packager/model_handlers/xgboost.py +13 -2
snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
snowflake/ml/model/_signatures/core.py +20 -17
snowflake/ml/model/custom_model.py +30 -27
snowflake/ml/model/model_signature.py +16 -17
snowflake/ml/model/type_hints.py +3 -0
snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +185 -98
snowflake/ml/modeling/_internal/estimator_utils.py +21 -0
snowflake/ml/modeling/_internal/model_specifications.py +3 -10
snowflake/ml/modeling/_internal/model_trainer_builder.py +55 -11
snowflake/ml/modeling/_internal/snowpark_handlers.py +9 -6
snowflake/ml/modeling/_internal/snowpark_trainer.py +10 -2
snowflake/ml/modeling/_internal/xgboost_external_memory_trainer.py +444 -0
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -16
snowflake/ml/modeling/cluster/affinity_propagation.py +51 -16
snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -16
snowflake/ml/modeling/cluster/birch.py +51 -16
snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -16
snowflake/ml/modeling/cluster/dbscan.py +51 -16
snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -16
snowflake/ml/modeling/cluster/k_means.py +51 -16
snowflake/ml/modeling/cluster/mean_shift.py +51 -16
snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -16
snowflake/ml/modeling/cluster/optics.py +51 -16
snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -16
snowflake/ml/modeling/cluster/spectral_clustering.py +51 -16
snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -16
snowflake/ml/modeling/compose/column_transformer.py +51 -16
snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -16
snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -16
snowflake/ml/modeling/covariance/empirical_covariance.py +51 -16
snowflake/ml/modeling/covariance/graphical_lasso.py +51 -16
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -16
snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -16
snowflake/ml/modeling/covariance/min_cov_det.py +51 -16
snowflake/ml/modeling/covariance/oas.py +51 -16
snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -16
snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -16
snowflake/ml/modeling/decomposition/factor_analysis.py +51 -16
snowflake/ml/modeling/decomposition/fast_ica.py +51 -16
snowflake/ml/modeling/decomposition/incremental_pca.py +51 -16
snowflake/ml/modeling/decomposition/kernel_pca.py +51 -16
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -16
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -16
snowflake/ml/modeling/decomposition/pca.py +51 -16
snowflake/ml/modeling/decomposition/sparse_pca.py +51 -16
snowflake/ml/modeling/decomposition/truncated_svd.py +51 -16
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -16
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -16
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -16
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -16
snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -16
snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -16
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -16
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -16
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -16
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -16
snowflake/ml/modeling/ensemble/isolation_forest.py +51 -16
snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -16
snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -16
snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -16
snowflake/ml/modeling/ensemble/voting_classifier.py +51 -16
snowflake/ml/modeling/ensemble/voting_regressor.py +51 -16
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -16
snowflake/ml/modeling/feature_selection/select_fdr.py +51 -16
snowflake/ml/modeling/feature_selection/select_fpr.py +51 -16
snowflake/ml/modeling/feature_selection/select_fwe.py +51 -16
snowflake/ml/modeling/feature_selection/select_k_best.py +51 -16
snowflake/ml/modeling/feature_selection/select_percentile.py +51 -16
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -16
snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -16
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -16
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -16
snowflake/ml/modeling/impute/iterative_imputer.py +51 -16
snowflake/ml/modeling/impute/knn_imputer.py +51 -16
snowflake/ml/modeling/impute/missing_indicator.py +51 -16
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -16
snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -16
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -16
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -16
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -16
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -16
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -16
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ard_regression.py +51 -16
snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -16
snowflake/ml/modeling/linear_model/elastic_net.py +51 -16
snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -16
snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -16
snowflake/ml/modeling/linear_model/huber_regressor.py +51 -16
snowflake/ml/modeling/linear_model/lars.py +51 -16
snowflake/ml/modeling/linear_model/lars_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso.py +51 -16
snowflake/ml/modeling/linear_model/lasso_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -16
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -16
snowflake/ml/modeling/linear_model/linear_regression.py +51 -16
snowflake/ml/modeling/linear_model/logistic_regression.py +51 -16
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -16
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -16
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -16
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -16
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -16
snowflake/ml/modeling/linear_model/perceptron.py +51 -16
snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -16
snowflake/ml/modeling/linear_model/ridge.py +51 -16
snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -16
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -16
snowflake/ml/modeling/linear_model/ridge_cv.py +51 -16
snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -16
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -16
snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -16
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -16
snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -16
snowflake/ml/modeling/manifold/isomap.py +51 -16
snowflake/ml/modeling/manifold/mds.py +51 -16
snowflake/ml/modeling/manifold/spectral_embedding.py +51 -16
snowflake/ml/modeling/manifold/tsne.py +51 -16
snowflake/ml/modeling/metrics/classification.py +5 -6
snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
snowflake/ml/modeling/metrics/ranking.py +7 -3
snowflake/ml/modeling/metrics/regression.py +6 -3
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -16
snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -16
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -16
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -16
snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -16
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -16
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -16
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -16
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -16
snowflake/ml/modeling/neighbors/kernel_density.py +51 -16
snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -16
snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -16
snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -16
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -16
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -16
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -16
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -16
snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -16
snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -16
snowflake/ml/modeling/preprocessing/min_max_scaler.py +15 -1
snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -16
snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -16
snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -16
snowflake/ml/modeling/svm/linear_svc.py +51 -16
snowflake/ml/modeling/svm/linear_svr.py +51 -16
snowflake/ml/modeling/svm/nu_svc.py +51 -16
snowflake/ml/modeling/svm/nu_svr.py +51 -16
snowflake/ml/modeling/svm/svc.py +51 -16
snowflake/ml/modeling/svm/svr.py +51 -16
snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -16
snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -16
snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -16
snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -16
snowflake/ml/modeling/xgboost/xgb_classifier.py +69 -16
snowflake/ml/modeling/xgboost/xgb_regressor.py +69 -16
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +69 -16
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +69 -16
snowflake/ml/registry/__init__.py +3 -0
snowflake/ml/registry/_manager/model_manager.py +163 -0
snowflake/ml/registry/model_registry.py +12 -0
snowflake/ml/registry/registry.py +100 -90
snowflake/ml/version.py +1 -1
snowflake_ml_python-1.2.1.dist-info/LICENSE.txt +202 -0
{snowflake_ml_python-1.1.2.dist-info → snowflake_ml_python-1.2.1.dist-info}/METADATA +295 -60
snowflake_ml_python-1.2.1.dist-info/RECORD +355 -0
{snowflake_ml_python-1.1.2.dist-info → snowflake_ml_python-1.2.1.dist-info}/WHEEL +2 -1
snowflake_ml_python-1.2.1.dist-info/top_level.txt +1 -0
snowflake/ml/model/_client/model/model_method_info.py +0 -19
snowflake_ml_python-1.1.2.dist-info/RECORD +0 -347
/snowflake/ml/_internal/{utils/spcs_image_registry.py → container_services/image_registry/credential.py} +0 -0
/snowflake/ml/_internal/{utils/image_registry_http_client.py → container_services/image_registry/http_client.py} +0 -0

snowflake/ml/modeling/linear_model/poisson_regressor.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class PoissonRegressor(BaseTransformer):
     r"""Generalized Linear Model with a Poisson distribution
     For more details on this class, see [sklearn.linear_model.PoissonRegressor]
@@ -175,7 +187,9 @@ class PoissonRegressor(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -257,11 +271,6 @@ class PoissonRegressor(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -289,7 +298,9 @@ class PoissonRegressor(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -560,6 +571,22 @@ class PoissonRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -575,8 +602,8 @@ class PoissonRegressor(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -589,13 +616,21 @@ class PoissonRegressor(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/linear_model/ransac_regressor.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class RANSACRegressor(BaseTransformer):
     r"""RANSAC (RANdom SAmple Consensus) algorithm
     For more details on this class, see [sklearn.linear_model.RANSACRegressor]
@@ -226,7 +238,9 @@ class RANSACRegressor(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         deps = deps | gather_dependencies(estimator)
         self._deps = list(deps)
@@ -313,11 +327,6 @@ class RANSACRegressor(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -345,7 +354,9 @@ class RANSACRegressor(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -616,6 +627,22 @@ class RANSACRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -631,8 +658,8 @@ class RANSACRegressor(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -645,13 +672,21 @@ class RANSACRegressor(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/linear_model/ridge.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class Ridge(BaseTransformer):
     r"""Linear least squares with l2 regularization
     For more details on this class, see [sklearn.linear_model.Ridge]
@@ -222,7 +234,9 @@ class Ridge(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -305,11 +319,6 @@ class Ridge(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -337,7 +346,9 @@ class Ridge(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -608,6 +619,22 @@ class Ridge(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -623,8 +650,8 @@ class Ridge(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -637,13 +664,21 @@ class Ridge(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/linear_model/ridge_classifier.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class RidgeClassifier(BaseTransformer):
     r"""Classifier using Ridge regression
     For more details on this class, see [sklearn.linear_model.RidgeClassifier]
@@ -221,7 +233,9 @@ class RidgeClassifier(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -305,11 +319,6 @@ class RidgeClassifier(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -337,7 +346,9 @@ class RidgeClassifier(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -608,6 +619,22 @@ class RidgeClassifier(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -623,8 +650,8 @@ class RidgeClassifier(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -637,13 +664,21 @@ class RidgeClassifier(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake/ml/modeling/linear_model/ridge_classifier_cv.py CHANGED Viewed

@@ -54,6 +54,18 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("sklearn.", "").split("_")])
+def _is_fit_predict_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_predict", None))
+    return check
+def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
+    def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
+        return False and callable(getattr(self._sklearn_object, "fit_transform", None))
+    return check
 class RidgeClassifierCV(BaseTransformer):
     r"""Ridge classifier with built-in cross-validation
     For more details on this class, see [sklearn.linear_model.RidgeClassifierCV]
@@ -175,7 +187,9 @@ class RidgeClassifierCV(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_passthrough_cols(passthrough_cols)
         self.set_drop_input_cols(drop_input_cols)
-        self.set_sample_weight_col(sample_weight_col)
+        self.set_sample_weight_col(sample_weight_col)
+        self._use_external_memory_version = False
+        self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -256,11 +270,6 @@ class RidgeClassifierCV(BaseTransformer):
         if isinstance(dataset, DataFrame):
             session = dataset._session
             assert session is not None  # keep mypy happy
-            # Validate that key package version in user workspace are supported in snowflake conda channel
-            # If customer doesn't have package in conda channel, replace the ones have the closest versions
-            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
             # Specify input columns so column pruning will be enforced
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
@@ -288,7 +297,9 @@ class RidgeClassifierCV(BaseTransformer):
             label_cols=self.label_cols,
             sample_weight_col=self.sample_weight_col,
             autogenerated=self._autogenerated,
-            subproject=_SUBPROJECT
+            subproject=_SUBPROJECT,
+            use_external_memory_version=self._use_external_memory_version,
+            batch_size=self._batch_size,
         )
         self._sklearn_object = model_trainer.train()
         self._is_fitted = True
@@ -559,6 +570,22 @@ class RidgeClassifierCV(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",
@@ -574,8 +601,8 @@ class RidgeClassifierCV(BaseTransformer):
         return output_df
-    @available_if(original_estimator_has_callable("fit_predict"))  # type: ignore[misc]
-    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> npt.NDArray[Any]:
+    @available_if(_is_fit_predict_method_enabled())  # type: ignore[misc]
+    def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
         """ Method not supported for this class.
@@ -588,13 +615,21 @@ class RidgeClassifierCV(BaseTransformer):
         Returns:
             Predicted dataset.
         """
-        if False:
-            self.fit(dataset)
-            assert self._sklearn_object is not None
-            labels : npt.NDArray[Any] = self._sklearn_object.labels_
-            return labels
-        else:
-            raise NotImplementedError
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.labels_
+    @available_if(_is_fit_transform_method_enabled())  # type: ignore[misc]
+    def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
+        """
+        Returns:
+            Transformed dataset.
+        """
+        self.fit(dataset)
+        assert self._sklearn_object is not None
+        return self._sklearn_object.embedding_
     def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
         """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.

snowflake-ml-python 1.1.2__py3-none-any.whl → 1.2.1__py3-none-any.whl

snowflake-ml-python 1.1.2py3-none-any.whl → 1.2.1py3-none-any.whl