PyPI - snowflake-ml-python - Versions diffs - 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl - Mend

snowflake-ml-python 1.6.1py3-none-any.whl → 1.6.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (284) hide show

snowflake/cortex/__init__.py +4 -0
snowflake/cortex/_classify_text.py +2 -2
snowflake/cortex/_embed_text_1024.py +37 -0
snowflake/cortex/_embed_text_768.py +37 -0
snowflake/cortex/_extract_answer.py +2 -2
snowflake/cortex/_sentiment.py +2 -2
snowflake/cortex/_summarize.py +2 -2
snowflake/cortex/_translate.py +2 -2
snowflake/cortex/_util.py +4 -4
snowflake/ml/_internal/env_utils.py +5 -5
snowflake/ml/_internal/exceptions/error_codes.py +2 -0
snowflake/ml/_internal/telemetry.py +142 -20
snowflake/ml/_internal/utils/db_utils.py +50 -0
snowflake/ml/_internal/utils/identifier.py +48 -11
snowflake/ml/_internal/utils/service_logger.py +63 -0
snowflake/ml/_internal/utils/snowflake_env.py +23 -13
snowflake/ml/_internal/utils/sql_identifier.py +26 -2
snowflake/ml/_internal/utils/table_manager.py +19 -1
snowflake/ml/data/_internal/arrow_ingestor.py +1 -11
snowflake/ml/data/data_connector.py +33 -7
snowflake/ml/data/ingestor_utils.py +20 -10
snowflake/ml/data/torch_utils.py +68 -0
snowflake/ml/dataset/dataset.py +1 -3
snowflake/ml/feature_store/access_manager.py +3 -3
snowflake/ml/feature_store/feature_store.py +60 -19
snowflake/ml/feature_store/feature_view.py +84 -30
snowflake/ml/fileset/embedded_stage_fs.py +1 -1
snowflake/ml/fileset/fileset.py +1 -1
snowflake/ml/fileset/sfcfs.py +9 -3
snowflake/ml/fileset/stage_fs.py +2 -1
snowflake/ml/lineage/lineage_node.py +7 -2
snowflake/ml/model/__init__.py +1 -2
snowflake/ml/model/_client/model/model_version_impl.py +96 -12
snowflake/ml/model/_client/ops/model_ops.py +124 -6
snowflake/ml/model/_client/ops/service_ops.py +309 -9
snowflake/ml/model/_client/service/model_deployment_spec.py +8 -5
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +2 -2
snowflake/ml/model/_client/sql/_base.py +5 -0
snowflake/ml/model/_client/sql/model.py +1 -0
snowflake/ml/model/_client/sql/model_version.py +9 -5
snowflake/ml/model/_client/sql/service.py +121 -20
snowflake/ml/model/_model_composer/model_composer.py +11 -39
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -11
snowflake/ml/model/_packager/model_env/model_env.py +4 -38
snowflake/ml/model/_packager/model_handlers/_utils.py +134 -28
snowflake/ml/model/_packager/model_handlers/catboost.py +31 -30
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +26 -18
snowflake/ml/model/_packager/model_handlers/lightgbm.py +31 -58
snowflake/ml/model/_packager/model_handlers/mlflow.py +3 -5
snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +169 -0
snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +15 -8
snowflake/ml/model/_packager/model_handlers/sklearn.py +56 -60
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +141 -9
snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
snowflake/ml/model/_packager/model_handlers/xgboost.py +63 -48
snowflake/ml/model/_packager/model_meta/model_meta.py +16 -42
snowflake/ml/model/_packager/model_meta/model_meta_schema.py +1 -14
snowflake/ml/model/_packager/model_packager.py +14 -8
snowflake/ml/model/_packager/model_runtime/model_runtime.py +11 -0
snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
snowflake/ml/model/_signatures/snowpark_handler.py +3 -2
snowflake/ml/model/_signatures/utils.py +9 -0
snowflake/ml/model/type_hints.py +12 -145
snowflake/ml/modeling/_internal/constants.py +1 -0
snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
snowflake/ml/modeling/_internal/model_specifications.py +2 -0
snowflake/ml/modeling/_internal/model_trainer.py +1 -0
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -4
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +130 -166
snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +0 -1
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +61 -21
snowflake/ml/modeling/cluster/affinity_propagation.py +61 -21
snowflake/ml/modeling/cluster/agglomerative_clustering.py +61 -21
snowflake/ml/modeling/cluster/birch.py +61 -21
snowflake/ml/modeling/cluster/bisecting_k_means.py +61 -21
snowflake/ml/modeling/cluster/dbscan.py +61 -21
snowflake/ml/modeling/cluster/feature_agglomeration.py +61 -21
snowflake/ml/modeling/cluster/k_means.py +61 -21
snowflake/ml/modeling/cluster/mean_shift.py +61 -21
snowflake/ml/modeling/cluster/mini_batch_k_means.py +61 -21
snowflake/ml/modeling/cluster/optics.py +61 -21
snowflake/ml/modeling/cluster/spectral_biclustering.py +61 -21
snowflake/ml/modeling/cluster/spectral_clustering.py +61 -21
snowflake/ml/modeling/cluster/spectral_coclustering.py +61 -21
snowflake/ml/modeling/compose/column_transformer.py +61 -21
snowflake/ml/modeling/compose/transformed_target_regressor.py +61 -21
snowflake/ml/modeling/covariance/elliptic_envelope.py +61 -21
snowflake/ml/modeling/covariance/empirical_covariance.py +61 -21
snowflake/ml/modeling/covariance/graphical_lasso.py +61 -21
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +61 -21
snowflake/ml/modeling/covariance/ledoit_wolf.py +61 -21
snowflake/ml/modeling/covariance/min_cov_det.py +61 -21
snowflake/ml/modeling/covariance/oas.py +61 -21
snowflake/ml/modeling/covariance/shrunk_covariance.py +61 -21
snowflake/ml/modeling/decomposition/dictionary_learning.py +61 -21
snowflake/ml/modeling/decomposition/factor_analysis.py +61 -21
snowflake/ml/modeling/decomposition/fast_ica.py +61 -21
snowflake/ml/modeling/decomposition/incremental_pca.py +61 -21
snowflake/ml/modeling/decomposition/kernel_pca.py +61 -21
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +61 -21
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +61 -21
snowflake/ml/modeling/decomposition/pca.py +61 -21
snowflake/ml/modeling/decomposition/sparse_pca.py +61 -21
snowflake/ml/modeling/decomposition/truncated_svd.py +61 -21
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +61 -21
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +61 -21
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +61 -21
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +61 -21
snowflake/ml/modeling/ensemble/bagging_classifier.py +61 -21
snowflake/ml/modeling/ensemble/bagging_regressor.py +61 -21
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +61 -21
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +61 -21
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +61 -21
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +61 -21
snowflake/ml/modeling/ensemble/isolation_forest.py +61 -21
snowflake/ml/modeling/ensemble/random_forest_classifier.py +61 -21
snowflake/ml/modeling/ensemble/random_forest_regressor.py +61 -21
snowflake/ml/modeling/ensemble/stacking_regressor.py +61 -21
snowflake/ml/modeling/ensemble/voting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/voting_regressor.py +61 -21
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +61 -21
snowflake/ml/modeling/feature_selection/select_fdr.py +61 -21
snowflake/ml/modeling/feature_selection/select_fpr.py +61 -21
snowflake/ml/modeling/feature_selection/select_fwe.py +61 -21
snowflake/ml/modeling/feature_selection/select_k_best.py +61 -21
snowflake/ml/modeling/feature_selection/select_percentile.py +61 -21
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +61 -21
snowflake/ml/modeling/feature_selection/variance_threshold.py +61 -21
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +61 -21
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +61 -21
snowflake/ml/modeling/impute/iterative_imputer.py +61 -21
snowflake/ml/modeling/impute/knn_imputer.py +61 -21
snowflake/ml/modeling/impute/missing_indicator.py +61 -21
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +61 -21
snowflake/ml/modeling/kernel_approximation/nystroem.py +61 -21
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +61 -21
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +61 -21
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +61 -21
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +61 -21
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +61 -21
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ard_regression.py +61 -21
snowflake/ml/modeling/linear_model/bayesian_ridge.py +61 -21
snowflake/ml/modeling/linear_model/elastic_net.py +61 -21
snowflake/ml/modeling/linear_model/elastic_net_cv.py +61 -21
snowflake/ml/modeling/linear_model/gamma_regressor.py +61 -21
snowflake/ml/modeling/linear_model/huber_regressor.py +61 -21
snowflake/ml/modeling/linear_model/lars.py +61 -21
snowflake/ml/modeling/linear_model/lars_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso.py +61 -21
snowflake/ml/modeling/linear_model/lasso_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +61 -21
snowflake/ml/modeling/linear_model/linear_regression.py +61 -21
snowflake/ml/modeling/linear_model/logistic_regression.py +61 -21
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_lasso.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +61 -21
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +61 -21
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +61 -21
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +61 -21
snowflake/ml/modeling/linear_model/perceptron.py +61 -21
snowflake/ml/modeling/linear_model/poisson_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ransac_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ridge.py +61 -21
snowflake/ml/modeling/linear_model/ridge_classifier.py +61 -21
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +61 -21
snowflake/ml/modeling/linear_model/ridge_cv.py +61 -21
snowflake/ml/modeling/linear_model/sgd_classifier.py +61 -21
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +61 -21
snowflake/ml/modeling/linear_model/sgd_regressor.py +61 -21
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +61 -21
snowflake/ml/modeling/linear_model/tweedie_regressor.py +61 -21
snowflake/ml/modeling/manifold/isomap.py +61 -21
snowflake/ml/modeling/manifold/mds.py +61 -21
snowflake/ml/modeling/manifold/spectral_embedding.py +61 -21
snowflake/ml/modeling/manifold/tsne.py +61 -21
snowflake/ml/modeling/metrics/metrics_utils.py +2 -2
snowflake/ml/modeling/metrics/ranking.py +0 -3
snowflake/ml/modeling/metrics/regression.py +0 -3
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +61 -21
snowflake/ml/modeling/mixture/gaussian_mixture.py +61 -21
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +61 -21
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +61 -21
snowflake/ml/modeling/multiclass/output_code_classifier.py +61 -21
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/categorical_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/complement_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +61 -21
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +61 -21
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +61 -21
snowflake/ml/modeling/neighbors/kernel_density.py +61 -21
snowflake/ml/modeling/neighbors/local_outlier_factor.py +61 -21
snowflake/ml/modeling/neighbors/nearest_centroid.py +61 -21
snowflake/ml/modeling/neighbors/nearest_neighbors.py +61 -21
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +61 -21
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +61 -21
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +61 -21
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +61 -21
snowflake/ml/modeling/neural_network/mlp_classifier.py +61 -21
snowflake/ml/modeling/neural_network/mlp_regressor.py +61 -21
snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
snowflake/ml/modeling/pipeline/pipeline.py +1 -13
snowflake/ml/modeling/preprocessing/polynomial_features.py +61 -21
snowflake/ml/modeling/semi_supervised/label_propagation.py +61 -21
snowflake/ml/modeling/semi_supervised/label_spreading.py +61 -21
snowflake/ml/modeling/svm/linear_svc.py +61 -21
snowflake/ml/modeling/svm/linear_svr.py +61 -21
snowflake/ml/modeling/svm/nu_svc.py +61 -21
snowflake/ml/modeling/svm/nu_svr.py +61 -21
snowflake/ml/modeling/svm/svc.py +61 -21
snowflake/ml/modeling/svm/svr.py +61 -21
snowflake/ml/modeling/tree/decision_tree_classifier.py +61 -21
snowflake/ml/modeling/tree/decision_tree_regressor.py +61 -21
snowflake/ml/modeling/tree/extra_tree_classifier.py +61 -21
snowflake/ml/modeling/tree/extra_tree_regressor.py +61 -21
snowflake/ml/modeling/xgboost/xgb_classifier.py +64 -23
snowflake/ml/modeling/xgboost/xgb_regressor.py +64 -23
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +64 -23
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +64 -23
snowflake/ml/monitoring/_client/model_monitor.py +126 -0
snowflake/ml/monitoring/_client/model_monitor_manager.py +361 -0
snowflake/ml/monitoring/_client/model_monitor_version.py +1 -0
snowflake/ml/monitoring/_client/monitor_sql_client.py +1335 -0
snowflake/ml/monitoring/_client/queries/record_count.ssql +14 -0
snowflake/ml/monitoring/_client/queries/rmse.ssql +28 -0
snowflake/ml/monitoring/entities/model_monitor_config.py +28 -0
snowflake/ml/monitoring/entities/model_monitor_interval.py +46 -0
snowflake/ml/monitoring/entities/output_score_type.py +90 -0
snowflake/ml/registry/_manager/model_manager.py +4 -0
snowflake/ml/registry/registry.py +166 -8
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/METADATA +43 -9
snowflake_ml_python-1.6.3.dist-info/RECORD +400 -0
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/WHEEL +1 -1
snowflake/ml/_internal/container_services/image_registry/credential.py +0 -84
snowflake/ml/_internal/container_services/image_registry/http_client.py +0 -127
snowflake/ml/_internal/container_services/image_registry/imagelib.py +0 -400
snowflake/ml/_internal/container_services/image_registry/registry_client.py +0 -212
snowflake/ml/_internal/utils/log_stream_processor.py +0 -30
snowflake/ml/_internal/utils/session_token_manager.py +0 -46
snowflake/ml/_internal/utils/spcs_attribution_utils.py +0 -122
snowflake/ml/_internal/utils/uri.py +0 -77
snowflake/ml/data/torch_dataset.py +0 -33
snowflake/ml/model/_api.py +0 -568
snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +0 -12
snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +0 -249
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +0 -130
snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +0 -36
snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +0 -268
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +0 -215
snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +0 -53
snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +0 -38
snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +0 -105
snowflake/ml/model/_deploy_client/snowservice/deploy.py +0 -611
snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +0 -116
snowflake/ml/model/_deploy_client/snowservice/instance_types.py +0 -10
snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +0 -28
snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template_with_model +0 -21
snowflake/ml/model/_deploy_client/utils/constants.py +0 -48
snowflake/ml/model/_deploy_client/utils/snowservice_client.py +0 -280
snowflake/ml/model/_deploy_client/warehouse/deploy.py +0 -202
snowflake/ml/model/_deploy_client/warehouse/infer_template.py +0 -99
snowflake/ml/model/_packager/model_handlers/llm.py +0 -267
snowflake/ml/model/_packager/model_meta/_core_requirements.py +0 -11
snowflake/ml/model/deploy_platforms.py +0 -6
snowflake/ml/model/models/llm.py +0 -104
snowflake/ml/monitoring/monitor.py +0 -203
snowflake/ml/registry/_initial_schema.py +0 -142
snowflake/ml/registry/_schema.py +0 -82
snowflake/ml/registry/_schema_upgrade_plans.py +0 -116
snowflake/ml/registry/_schema_version_manager.py +0 -163
snowflake/ml/registry/model_registry.py +0 -2048
snowflake_ml_python-1.6.1.dist-info/RECORD +0 -422
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/top_level.txt +0 -0

snowflake/ml/modeling/xgboost/xgbrf_classifier.py CHANGED Viewed

@@ -4,18 +4,17 @@
 #
 import inspect
 import os
-import posixpath
-from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
-from typing_extensions import TypeGuard
+from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
 from uuid import uuid4
 import cloudpickle as cp
-import pandas as pd
 import numpy as np
+import pandas as pd
 from numpy import typing as npt
 import numpy
+import sklearn
 import xgboost
 from sklearn.utils.metaestimators import available_if
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
 from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
-from snowflake.ml._internal.utils import pkg_version_utils, identifier
+from snowflake.ml._internal.utils import identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.transformer_protocols import (
-    ModelTransformHandlers,
     BatchInferenceKwargsTypedDict,
     ScoreKwargsTypedDict
 )
@@ -363,7 +361,7 @@ class XGBRFClassifier(BaseTransformer):
         self.set_sample_weight_col(sample_weight_col)
         self._use_external_memory_version = use_external_memory_version
         self._batch_size = batch_size
-        deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
+        deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -699,12 +697,23 @@ class XGBRFClassifier(BaseTransformer):
             autogenerated=self._autogenerated,
             subproject=_SUBPROJECT,
         )
-        output_result, fitted_estimator = model_trainer.train_fit_predict(
-            drop_input_cols=self._drop_input_cols,
-            expected_output_cols_list=(
-                self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
-            ),
+        expected_output_cols = (
+            self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
         )
+        if isinstance(dataset, DataFrame):
+            expected_output_cols, example_output_pd_df = self._align_expected_output(
+                "fit_predict", dataset, expected_output_cols, output_cols_prefix
+            )
+            output_result, fitted_estimator = model_trainer.train_fit_predict(
+                drop_input_cols=self._drop_input_cols,
+                expected_output_cols_list=expected_output_cols,
+                example_output_pd_df=example_output_pd_df,
+            )
+        else:
+            output_result, fitted_estimator = model_trainer.train_fit_predict(
+                drop_input_cols=self._drop_input_cols,
+                expected_output_cols_list=expected_output_cols,
+            )
         self._sklearn_object = fitted_estimator
         self._is_fitted = True
         return output_result
@@ -727,6 +736,7 @@ class XGBRFClassifier(BaseTransformer):
         """
         self._infer_input_output_cols(dataset)
         super()._check_dataset_type(dataset)
         model_trainer = ModelTrainerBuilder.build_fit_transform(
             estimator=self._sklearn_object,
             dataset=dataset,
@@ -783,12 +793,41 @@ class XGBRFClassifier(BaseTransformer):
         return rv
-    def _align_expected_output_names(
-        self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
-    ) -> List[str]:
+    def _align_expected_output(
+        self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
+    ) -> Tuple[List[str], pd.DataFrame]:
+        """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
+        and output dataframe with 1 line.
+        If the method is fit_predict, run 2 lines of data.
+        """
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
+        # so change the minimum of number of rows to 2
+        num_examples = 2
+        statement_params = telemetry.get_function_usage_statement_params(
+            project=_PROJECT,
+            subproject=_SUBPROJECT,
+            function_name=telemetry.get_statement_params_full_func_name(
+                inspect.currentframe(), XGBRFClassifier.__class__.__name__
+            ),
+            api_calls=[Session.call],
+            custom_tags={"autogen": True} if self._autogenerated else None,
+        )
+        if output_cols_prefix == "fit_predict_":
+            if hasattr(self._sklearn_object, "n_clusters"):
+                # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
+                num_examples = self._sklearn_object.n_clusters
+            elif hasattr(self._sklearn_object, "min_samples"):
+                # OPTICS default min_samples 5, which requires at least 5 lines of data
+                num_examples = self._sklearn_object.min_samples
+            elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
+                # LocalOutlierFactor expects n_neighbors <= n_samples
+                num_examples = self._sklearn_object.n_neighbors
+            sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
+        else:
+            sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
         # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
         # seen during the fit.
@@ -800,12 +839,14 @@ class XGBRFClassifier(BaseTransformer):
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:
             output_df_columns_set -= set(self.sample_weight_col)
         # if the dimension of inferred output column names is correct; use it
         if len(expected_output_cols_list) == len(output_df_columns_set):
-            return expected_output_cols_list
+            return expected_output_cols_list, output_df_pd
         # otherwise, use the sklearn estimator's output
         else:
-            return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
+            expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
+            return expected_output_cols_list, output_df_pd[expected_output_cols_list]
     @available_if(original_estimator_has_callable("predict_proba"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
@@ -853,7 +894,7 @@ class XGBRFClassifier(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -921,7 +962,7 @@ class XGBRFClassifier(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
         elif isinstance(dataset, pd.DataFrame):
@@ -984,7 +1025,7 @@ class XGBRFClassifier(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -1049,7 +1090,7 @@ class XGBRFClassifier(BaseTransformer):
                 drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -1114,7 +1155,7 @@ class XGBRFClassifier(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                score_sproc_imports=['xgboost'],
+                score_sproc_imports=['xgboost', 'sklearn'],
             )
         elif isinstance(dataset, pd.DataFrame):
             # pandas_handler.score() does not require any extra kwargs.

snowflake/ml/modeling/xgboost/xgbrf_regressor.py CHANGED Viewed

@@ -4,18 +4,17 @@
 #
 import inspect
 import os
-import posixpath
-from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
-from typing_extensions import TypeGuard
+from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
 from uuid import uuid4
 import cloudpickle as cp
-import pandas as pd
 import numpy as np
+import pandas as pd
 from numpy import typing as npt
 import numpy
+import sklearn
 import xgboost
 from sklearn.utils.metaestimators import available_if
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
 from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
-from snowflake.ml._internal.utils import pkg_version_utils, identifier
+from snowflake.ml._internal.utils import identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.transformer_protocols import (
-    ModelTransformHandlers,
     BatchInferenceKwargsTypedDict,
     ScoreKwargsTypedDict
 )
@@ -363,7 +361,7 @@ class XGBRFRegressor(BaseTransformer):
         self.set_sample_weight_col(sample_weight_col)
         self._use_external_memory_version = use_external_memory_version
         self._batch_size = batch_size
-        deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
+        deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -699,12 +697,23 @@ class XGBRFRegressor(BaseTransformer):
             autogenerated=self._autogenerated,
             subproject=_SUBPROJECT,
         )
-        output_result, fitted_estimator = model_trainer.train_fit_predict(
-            drop_input_cols=self._drop_input_cols,
-            expected_output_cols_list=(
-                self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
-            ),
+        expected_output_cols = (
+            self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
         )
+        if isinstance(dataset, DataFrame):
+            expected_output_cols, example_output_pd_df = self._align_expected_output(
+                "fit_predict", dataset, expected_output_cols, output_cols_prefix
+            )
+            output_result, fitted_estimator = model_trainer.train_fit_predict(
+                drop_input_cols=self._drop_input_cols,
+                expected_output_cols_list=expected_output_cols,
+                example_output_pd_df=example_output_pd_df,
+            )
+        else:
+            output_result, fitted_estimator = model_trainer.train_fit_predict(
+                drop_input_cols=self._drop_input_cols,
+                expected_output_cols_list=expected_output_cols,
+            )
         self._sklearn_object = fitted_estimator
         self._is_fitted = True
         return output_result
@@ -727,6 +736,7 @@ class XGBRFRegressor(BaseTransformer):
         """
         self._infer_input_output_cols(dataset)
         super()._check_dataset_type(dataset)
         model_trainer = ModelTrainerBuilder.build_fit_transform(
             estimator=self._sklearn_object,
             dataset=dataset,
@@ -783,12 +793,41 @@ class XGBRFRegressor(BaseTransformer):
         return rv
-    def _align_expected_output_names(
-        self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
-    ) -> List[str]:
+    def _align_expected_output(
+        self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
+    ) -> Tuple[List[str], pd.DataFrame]:
+        """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
+        and output dataframe with 1 line.
+        If the method is fit_predict, run 2 lines of data.
+        """
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
+        # so change the minimum of number of rows to 2
+        num_examples = 2
+        statement_params = telemetry.get_function_usage_statement_params(
+            project=_PROJECT,
+            subproject=_SUBPROJECT,
+            function_name=telemetry.get_statement_params_full_func_name(
+                inspect.currentframe(), XGBRFRegressor.__class__.__name__
+            ),
+            api_calls=[Session.call],
+            custom_tags={"autogen": True} if self._autogenerated else None,
+        )
+        if output_cols_prefix == "fit_predict_":
+            if hasattr(self._sklearn_object, "n_clusters"):
+                # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
+                num_examples = self._sklearn_object.n_clusters
+            elif hasattr(self._sklearn_object, "min_samples"):
+                # OPTICS default min_samples 5, which requires at least 5 lines of data
+                num_examples = self._sklearn_object.min_samples
+            elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
+                # LocalOutlierFactor expects n_neighbors <= n_samples
+                num_examples = self._sklearn_object.n_neighbors
+            sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
+        else:
+            sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
         # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
         # seen during the fit.
@@ -800,12 +839,14 @@ class XGBRFRegressor(BaseTransformer):
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:
             output_df_columns_set -= set(self.sample_weight_col)
         # if the dimension of inferred output column names is correct; use it
         if len(expected_output_cols_list) == len(output_df_columns_set):
-            return expected_output_cols_list
+            return expected_output_cols_list, output_df_pd
         # otherwise, use the sklearn estimator's output
         else:
-            return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
+            expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
+            return expected_output_cols_list, output_df_pd[expected_output_cols_list]
     @available_if(original_estimator_has_callable("predict_proba"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
@@ -851,7 +892,7 @@ class XGBRFRegressor(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -917,7 +958,7 @@ class XGBRFRegressor(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
         elif isinstance(dataset, pd.DataFrame):
@@ -980,7 +1021,7 @@ class XGBRFRegressor(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -1045,7 +1086,7 @@ class XGBRFRegressor(BaseTransformer):
                 drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -1110,7 +1151,7 @@ class XGBRFRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                score_sproc_imports=['xgboost'],
+                score_sproc_imports=['xgboost', 'sklearn'],
             )
         elif isinstance(dataset, pd.DataFrame):
             # pandas_handler.score() does not require any extra kwargs.

snowflake/ml/monitoring/_client/model_monitor.py ADDED Viewed

@@ -0,0 +1,126 @@
+from typing import List, Union
+import pandas as pd
+from snowflake import snowpark
+from snowflake.ml._internal import telemetry
+from snowflake.ml._internal.utils import sql_identifier
+from snowflake.ml.monitoring._client import monitor_sql_client
+class ModelMonitor:
+    """Class to manage instrumentation of Model Monitoring and Observability"""
+    name: sql_identifier.SqlIdentifier
+    _model_monitor_client: monitor_sql_client._ModelMonitorSQLClient
+    _fully_qualified_model_name: str
+    _version_name: sql_identifier.SqlIdentifier
+    _function_name: sql_identifier.SqlIdentifier
+    _prediction_columns: List[sql_identifier.SqlIdentifier]
+    _label_columns: List[sql_identifier.SqlIdentifier]
+    def __init__(self) -> None:
+        raise RuntimeError("ModelMonitor's initializer is not meant to be used.")
+    @classmethod
+    def _ref(
+        cls,
+        model_monitor_client: monitor_sql_client._ModelMonitorSQLClient,
+        name: sql_identifier.SqlIdentifier,
+        *,
+        fully_qualified_model_name: str,
+        version_name: sql_identifier.SqlIdentifier,
+        function_name: sql_identifier.SqlIdentifier,
+        prediction_columns: List[sql_identifier.SqlIdentifier],
+        label_columns: List[sql_identifier.SqlIdentifier],
+    ) -> "ModelMonitor":
+        self: "ModelMonitor" = object.__new__(cls)
+        self.name = name
+        self._model_monitor_client = model_monitor_client
+        self._fully_qualified_model_name = fully_qualified_model_name
+        self._version_name = version_name
+        self._function_name = function_name
+        self._prediction_columns = prediction_columns
+        self._label_columns = label_columns
+        return self
+    @telemetry.send_api_usage_telemetry(
+        project=telemetry.TelemetryProject.MLOPS.value,
+        subproject=telemetry.TelemetrySubProject.MONITORING.value,
+    )
+    def set_baseline(self, baseline_df: Union[pd.DataFrame, snowpark.DataFrame]) -> None:
+        """
+        The baseline dataframe is compared with the monitored data once monitoring is enabled.
+        The columns of the dataframe should match the columns of the source table that the
+        ModelMonitor was configured with. Calling this method overwrites any existing baseline split data.
+        Args:
+            baseline_df: Snowpark dataframe containing baseline data.
+        Raises:
+            ValueError: baseline_df does not contain prediction or label columns
+        """
+        statement_params = telemetry.get_statement_params(
+            project=telemetry.TelemetryProject.MLOPS.value,
+            subproject=telemetry.TelemetrySubProject.MONITORING.value,
+        )
+        if isinstance(baseline_df, pd.DataFrame):
+            baseline_df = self._model_monitor_client._sql_client._session.create_dataframe(baseline_df)
+        column_names_identifiers: List[sql_identifier.SqlIdentifier] = [
+            sql_identifier.SqlIdentifier(column_name) for column_name in baseline_df.columns
+        ]
+        prediction_cols_not_found = any(
+            [prediction_col not in column_names_identifiers for prediction_col in self._prediction_columns]
+        )
+        label_cols_not_found = any(
+            [label_col.identifier() not in column_names_identifiers for label_col in self._label_columns]
+        )
+        if prediction_cols_not_found:
+            raise ValueError(
+                "Specified prediction columns were not found in the baseline dataframe. "
+                f"Columns provided were: {column_names_identifiers}. "
+                f"Configured prediction columns were: {self._prediction_columns}."
+            )
+        if label_cols_not_found:
+            raise ValueError(
+                "Specified label columns were not found in the baseline dataframe."
+                f"Columns provided in the baseline dataframe were: {column_names_identifiers}."
+                f"Configured label columns were: {self._label_columns}."
+            )
+        # Create the table by materializing the df
+        self._model_monitor_client.materialize_baseline_dataframe(
+            baseline_df,
+            self._fully_qualified_model_name,
+            self._version_name,
+            statement_params=statement_params,
+        )
+    def suspend(self) -> None:
+        """Suspend pipeline for ModelMonitor"""
+        statement_params = telemetry.get_statement_params(
+            telemetry.TelemetryProject.MLOPS.value,
+            telemetry.TelemetrySubProject.MONITORING.value,
+        )
+        _, _, model_name = sql_identifier.parse_fully_qualified_name(self._fully_qualified_model_name)
+        self._model_monitor_client.suspend_monitor_dynamic_tables(
+            model_name=model_name,
+            version_name=self._version_name,
+            statement_params=statement_params,
+        )
+    def resume(self) -> None:
+        """Resume pipeline for ModelMonitor"""
+        statement_params = telemetry.get_statement_params(
+            telemetry.TelemetryProject.MLOPS.value,
+            telemetry.TelemetrySubProject.MONITORING.value,
+        )
+        _, _, model_name = sql_identifier.parse_fully_qualified_name(self._fully_qualified_model_name)
+        self._model_monitor_client.resume_monitor_dynamic_tables(
+            model_name=model_name,
+            version_name=self._version_name,
+            statement_params=statement_params,
+        )

snowflake-ml-python 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl

snowflake-ml-python 1.6.1py3-none-any.whl → 1.6.3py3-none-any.whl