snowflake-ml-python 1.6.0__py3-none-any.whl → 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +7 -33
- snowflake/ml/_internal/env_utils.py +11 -5
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +4 -1
- snowflake/ml/_internal/telemetry.py +156 -20
- snowflake/ml/_internal/utils/identifier.py +48 -11
- snowflake/ml/_internal/utils/pkg_version_utils.py +8 -22
- snowflake/ml/_internal/utils/snowflake_env.py +23 -13
- snowflake/ml/_internal/utils/sql_identifier.py +1 -1
- snowflake/ml/_internal/utils/table_manager.py +19 -1
- snowflake/ml/_internal/utils/uri.py +2 -2
- snowflake/ml/data/_internal/arrow_ingestor.py +66 -10
- snowflake/ml/data/data_connector.py +88 -9
- snowflake/ml/data/data_ingestor.py +18 -1
- snowflake/ml/data/{_internal/ingestor_utils.py → ingestor_utils.py} +5 -1
- snowflake/ml/data/torch_utils.py +68 -0
- snowflake/ml/dataset/dataset.py +1 -3
- snowflake/ml/dataset/dataset_metadata.py +3 -1
- snowflake/ml/dataset/dataset_reader.py +9 -3
- snowflake/ml/feature_store/examples/airline_features/entities.py +16 -0
- snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +31 -0
- snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +42 -0
- snowflake/ml/feature_store/examples/airline_features/source.yaml +7 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +10 -4
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +6 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +3 -0
- snowflake/ml/feature_store/examples/example_helper.py +69 -31
- snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +3 -3
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/{dropoff_features.py → location_features.py} +14 -9
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +36 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +5 -1
- snowflake/ml/feature_store/examples/source_data/airline.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +1 -1
- snowflake/ml/feature_store/examples/wine_quality_features/entities.py +3 -3
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +13 -6
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +8 -5
- snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +3 -0
- snowflake/ml/feature_store/feature_store.py +100 -41
- snowflake/ml/feature_store/feature_view.py +149 -5
- snowflake/ml/fileset/embedded_stage_fs.py +1 -1
- snowflake/ml/fileset/fileset.py +1 -1
- snowflake/ml/fileset/sfcfs.py +9 -3
- snowflake/ml/model/_client/model/model_impl.py +11 -2
- snowflake/ml/model/_client/model/model_version_impl.py +186 -20
- snowflake/ml/model/_client/ops/model_ops.py +144 -30
- snowflake/ml/model/_client/ops/service_ops.py +312 -0
- snowflake/ml/model/_client/service/model_deployment_spec.py +94 -0
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +30 -0
- snowflake/ml/model/_client/sql/model_version.py +13 -4
- snowflake/ml/model/_client/sql/service.py +196 -0
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +1 -1
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +3 -3
- snowflake/ml/model/_model_composer/model_composer.py +5 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +13 -10
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +3 -0
- snowflake/ml/model/_packager/model_env/model_env.py +7 -2
- snowflake/ml/model/_packager/model_handlers/_base.py +29 -12
- snowflake/ml/model/_packager/model_handlers/_utils.py +46 -14
- snowflake/ml/model/_packager/model_handlers/catboost.py +25 -16
- snowflake/ml/model/_packager/model_handlers/custom.py +6 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +32 -20
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +23 -56
- snowflake/ml/model/_packager/model_handlers/llm.py +11 -5
- snowflake/ml/model/_packager/model_handlers/mlflow.py +8 -3
- snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +116 -0
- snowflake/ml/model/_packager/model_handlers/pytorch.py +8 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -3
- snowflake/ml/model/_packager/model_handlers/sklearn.py +99 -4
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +123 -5
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +9 -4
- snowflake/ml/model/_packager/model_handlers/torchscript.py +10 -5
- snowflake/ml/model/_packager/model_handlers/xgboost.py +56 -47
- snowflake/ml/model/_packager/model_meta/model_meta.py +35 -2
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +11 -0
- snowflake/ml/model/_packager/model_packager.py +4 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -2
- snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
- snowflake/ml/model/_signatures/utils.py +9 -0
- snowflake/ml/model/models/llm.py +3 -1
- snowflake/ml/model/type_hints.py +10 -4
- snowflake/ml/modeling/_internal/constants.py +1 -0
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
- snowflake/ml/modeling/_internal/model_specifications.py +2 -0
- snowflake/ml/modeling/_internal/model_trainer.py +1 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +113 -160
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +60 -21
- snowflake/ml/modeling/cluster/affinity_propagation.py +60 -21
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +60 -21
- snowflake/ml/modeling/cluster/birch.py +60 -21
- snowflake/ml/modeling/cluster/bisecting_k_means.py +60 -21
- snowflake/ml/modeling/cluster/dbscan.py +60 -21
- snowflake/ml/modeling/cluster/feature_agglomeration.py +60 -21
- snowflake/ml/modeling/cluster/k_means.py +60 -21
- snowflake/ml/modeling/cluster/mean_shift.py +60 -21
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +60 -21
- snowflake/ml/modeling/cluster/optics.py +60 -21
- snowflake/ml/modeling/cluster/spectral_biclustering.py +60 -21
- snowflake/ml/modeling/cluster/spectral_clustering.py +60 -21
- snowflake/ml/modeling/cluster/spectral_coclustering.py +60 -21
- snowflake/ml/modeling/compose/column_transformer.py +60 -21
- snowflake/ml/modeling/compose/transformed_target_regressor.py +60 -21
- snowflake/ml/modeling/covariance/elliptic_envelope.py +60 -21
- snowflake/ml/modeling/covariance/empirical_covariance.py +60 -21
- snowflake/ml/modeling/covariance/graphical_lasso.py +60 -21
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +60 -21
- snowflake/ml/modeling/covariance/ledoit_wolf.py +60 -21
- snowflake/ml/modeling/covariance/min_cov_det.py +60 -21
- snowflake/ml/modeling/covariance/oas.py +60 -21
- snowflake/ml/modeling/covariance/shrunk_covariance.py +60 -21
- snowflake/ml/modeling/decomposition/dictionary_learning.py +60 -21
- snowflake/ml/modeling/decomposition/factor_analysis.py +60 -21
- snowflake/ml/modeling/decomposition/fast_ica.py +60 -21
- snowflake/ml/modeling/decomposition/incremental_pca.py +60 -21
- snowflake/ml/modeling/decomposition/kernel_pca.py +60 -21
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +60 -21
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +60 -21
- snowflake/ml/modeling/decomposition/pca.py +60 -21
- snowflake/ml/modeling/decomposition/sparse_pca.py +60 -21
- snowflake/ml/modeling/decomposition/truncated_svd.py +60 -21
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +60 -21
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +60 -21
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/bagging_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/bagging_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/isolation_forest.py +60 -21
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/stacking_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/voting_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/voting_regressor.py +60 -21
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +60 -21
- snowflake/ml/modeling/feature_selection/select_fdr.py +60 -21
- snowflake/ml/modeling/feature_selection/select_fpr.py +60 -21
- snowflake/ml/modeling/feature_selection/select_fwe.py +60 -21
- snowflake/ml/modeling/feature_selection/select_k_best.py +60 -21
- snowflake/ml/modeling/feature_selection/select_percentile.py +60 -21
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +60 -21
- snowflake/ml/modeling/feature_selection/variance_threshold.py +60 -21
- snowflake/ml/modeling/framework/base.py +28 -19
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +60 -21
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +60 -21
- snowflake/ml/modeling/impute/iterative_imputer.py +60 -21
- snowflake/ml/modeling/impute/knn_imputer.py +60 -21
- snowflake/ml/modeling/impute/missing_indicator.py +60 -21
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +60 -21
- snowflake/ml/modeling/kernel_approximation/nystroem.py +60 -21
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +60 -21
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +60 -21
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +60 -21
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +60 -21
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +60 -21
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/ard_regression.py +60 -21
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +60 -21
- snowflake/ml/modeling/linear_model/elastic_net.py +60 -21
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +60 -21
- snowflake/ml/modeling/linear_model/gamma_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/huber_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/lars.py +60 -21
- snowflake/ml/modeling/linear_model/lars_cv.py +60 -21
- snowflake/ml/modeling/linear_model/lasso.py +60 -21
- snowflake/ml/modeling/linear_model/lasso_cv.py +60 -21
- snowflake/ml/modeling/linear_model/lasso_lars.py +60 -21
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +60 -21
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +60 -21
- snowflake/ml/modeling/linear_model/linear_regression.py +60 -21
- snowflake/ml/modeling/linear_model/logistic_regression.py +60 -21
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +60 -21
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +60 -21
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +60 -21
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +60 -21
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +60 -21
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +60 -21
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +60 -21
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/perceptron.py +60 -21
- snowflake/ml/modeling/linear_model/poisson_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/ransac_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/ridge.py +60 -21
- snowflake/ml/modeling/linear_model/ridge_classifier.py +60 -21
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +60 -21
- snowflake/ml/modeling/linear_model/ridge_cv.py +60 -21
- snowflake/ml/modeling/linear_model/sgd_classifier.py +60 -21
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +60 -21
- snowflake/ml/modeling/linear_model/sgd_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +60 -21
- snowflake/ml/modeling/manifold/isomap.py +60 -21
- snowflake/ml/modeling/manifold/mds.py +60 -21
- snowflake/ml/modeling/manifold/spectral_embedding.py +60 -21
- snowflake/ml/modeling/manifold/tsne.py +60 -21
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +60 -21
- snowflake/ml/modeling/mixture/gaussian_mixture.py +60 -21
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +60 -21
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +60 -21
- snowflake/ml/modeling/multiclass/output_code_classifier.py +60 -21
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +60 -21
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +60 -21
- snowflake/ml/modeling/naive_bayes/complement_nb.py +60 -21
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +60 -21
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +60 -21
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +60 -21
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +60 -21
- snowflake/ml/modeling/neighbors/kernel_density.py +60 -21
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +60 -21
- snowflake/ml/modeling/neighbors/nearest_centroid.py +60 -21
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +60 -21
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +60 -21
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +60 -21
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +60 -21
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +60 -21
- snowflake/ml/modeling/neural_network/mlp_classifier.py +60 -21
- snowflake/ml/modeling/neural_network/mlp_regressor.py +60 -21
- snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +4 -12
- snowflake/ml/modeling/preprocessing/polynomial_features.py +60 -21
- snowflake/ml/modeling/semi_supervised/label_propagation.py +60 -21
- snowflake/ml/modeling/semi_supervised/label_spreading.py +60 -21
- snowflake/ml/modeling/svm/linear_svc.py +60 -21
- snowflake/ml/modeling/svm/linear_svr.py +60 -21
- snowflake/ml/modeling/svm/nu_svc.py +60 -21
- snowflake/ml/modeling/svm/nu_svr.py +60 -21
- snowflake/ml/modeling/svm/svc.py +60 -21
- snowflake/ml/modeling/svm/svr.py +60 -21
- snowflake/ml/modeling/tree/decision_tree_classifier.py +60 -21
- snowflake/ml/modeling/tree/decision_tree_regressor.py +60 -21
- snowflake/ml/modeling/tree/extra_tree_classifier.py +60 -21
- snowflake/ml/modeling/tree/extra_tree_regressor.py +60 -21
- snowflake/ml/modeling/xgboost/xgb_classifier.py +63 -23
- snowflake/ml/modeling/xgboost/xgb_regressor.py +63 -23
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +63 -23
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +63 -23
- snowflake/ml/registry/_manager/model_manager.py +20 -2
- snowflake/ml/registry/model_registry.py +1 -1
- snowflake/ml/registry/registry.py +1 -2
- snowflake/ml/utils/sql_client.py +22 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/METADATA +55 -3
- {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/RECORD +251 -238
- {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/WHEEL +1 -1
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/pickup_features.py +0 -58
- {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/top_level.txt +0 -0
@@ -4,18 +4,17 @@
|
|
4
4
|
#
|
5
5
|
import inspect
|
6
6
|
import os
|
7
|
-
import
|
8
|
-
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
9
|
-
from typing_extensions import TypeGuard
|
7
|
+
from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
|
10
8
|
from uuid import uuid4
|
11
9
|
|
12
10
|
import cloudpickle as cp
|
13
|
-
import pandas as pd
|
14
11
|
import numpy as np
|
12
|
+
import pandas as pd
|
15
13
|
from numpy import typing as npt
|
16
14
|
|
17
15
|
|
18
16
|
import numpy
|
17
|
+
import sklearn
|
19
18
|
import xgboost
|
20
19
|
from sklearn.utils.metaestimators import available_if
|
21
20
|
|
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
|
|
23
22
|
from snowflake.ml._internal import telemetry
|
24
23
|
from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
|
25
24
|
from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
26
|
-
from snowflake.ml._internal.utils import
|
25
|
+
from snowflake.ml._internal.utils import identifier
|
27
26
|
from snowflake.snowpark import DataFrame, Session
|
28
27
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
28
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
30
29
|
from snowflake.ml.modeling._internal.transformer_protocols import (
|
31
|
-
ModelTransformHandlers,
|
32
30
|
BatchInferenceKwargsTypedDict,
|
33
31
|
ScoreKwargsTypedDict
|
34
32
|
)
|
@@ -363,7 +361,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
363
361
|
self.set_sample_weight_col(sample_weight_col)
|
364
362
|
self._use_external_memory_version = use_external_memory_version
|
365
363
|
self._batch_size = batch_size
|
366
|
-
deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
364
|
+
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
367
365
|
|
368
366
|
self._deps = list(deps)
|
369
367
|
|
@@ -699,12 +697,23 @@ class XGBRFClassifier(BaseTransformer):
|
|
699
697
|
autogenerated=self._autogenerated,
|
700
698
|
subproject=_SUBPROJECT,
|
701
699
|
)
|
702
|
-
|
703
|
-
|
704
|
-
expected_output_cols_list=(
|
705
|
-
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
706
|
-
),
|
700
|
+
expected_output_cols = (
|
701
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
707
702
|
)
|
703
|
+
if isinstance(dataset, DataFrame):
|
704
|
+
expected_output_cols, example_output_pd_df = self._align_expected_output(
|
705
|
+
"fit_predict", dataset, expected_output_cols, output_cols_prefix
|
706
|
+
)
|
707
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
708
|
+
drop_input_cols=self._drop_input_cols,
|
709
|
+
expected_output_cols_list=expected_output_cols,
|
710
|
+
example_output_pd_df=example_output_pd_df,
|
711
|
+
)
|
712
|
+
else:
|
713
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
714
|
+
drop_input_cols=self._drop_input_cols,
|
715
|
+
expected_output_cols_list=expected_output_cols,
|
716
|
+
)
|
708
717
|
self._sklearn_object = fitted_estimator
|
709
718
|
self._is_fitted = True
|
710
719
|
return output_result
|
@@ -783,12 +792,41 @@ class XGBRFClassifier(BaseTransformer):
|
|
783
792
|
|
784
793
|
return rv
|
785
794
|
|
786
|
-
def
|
787
|
-
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
788
|
-
) -> List[str]:
|
795
|
+
def _align_expected_output(
|
796
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
|
797
|
+
) -> Tuple[List[str], pd.DataFrame]:
|
798
|
+
""" Run 1 line of data with the desired method, and return one tuple that consists of the output column names
|
799
|
+
and output dataframe with 1 line.
|
800
|
+
If the method is fit_predict, run 2 lines of data.
|
801
|
+
"""
|
789
802
|
# in case the inferred output column names dimension is different
|
790
803
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
791
|
-
|
804
|
+
|
805
|
+
# For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
|
806
|
+
# so change the minimum of number of rows to 2
|
807
|
+
num_examples = 2
|
808
|
+
statement_params = telemetry.get_function_usage_statement_params(
|
809
|
+
project=_PROJECT,
|
810
|
+
subproject=_SUBPROJECT,
|
811
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
812
|
+
inspect.currentframe(), XGBRFClassifier.__class__.__name__
|
813
|
+
),
|
814
|
+
api_calls=[Session.call],
|
815
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
816
|
+
)
|
817
|
+
if output_cols_prefix == "fit_predict_":
|
818
|
+
if hasattr(self._sklearn_object, "n_clusters"):
|
819
|
+
# cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
|
820
|
+
num_examples = self._sklearn_object.n_clusters
|
821
|
+
elif hasattr(self._sklearn_object, "min_samples"):
|
822
|
+
# OPTICS default min_samples 5, which requires at least 5 lines of data
|
823
|
+
num_examples = self._sklearn_object.min_samples
|
824
|
+
elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
|
825
|
+
# LocalOutlierFactor expects n_neighbors <= n_samples
|
826
|
+
num_examples = self._sklearn_object.n_neighbors
|
827
|
+
sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
|
828
|
+
else:
|
829
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
|
792
830
|
|
793
831
|
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
794
832
|
# seen during the fit.
|
@@ -800,12 +838,14 @@ class XGBRFClassifier(BaseTransformer):
|
|
800
838
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
801
839
|
if self.sample_weight_col:
|
802
840
|
output_df_columns_set -= set(self.sample_weight_col)
|
841
|
+
|
803
842
|
# if the dimension of inferred output column names is correct; use it
|
804
843
|
if len(expected_output_cols_list) == len(output_df_columns_set):
|
805
|
-
return expected_output_cols_list
|
844
|
+
return expected_output_cols_list, output_df_pd
|
806
845
|
# otherwise, use the sklearn estimator's output
|
807
846
|
else:
|
808
|
-
|
847
|
+
expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
848
|
+
return expected_output_cols_list, output_df_pd[expected_output_cols_list]
|
809
849
|
|
810
850
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
811
851
|
@telemetry.send_api_usage_telemetry(
|
@@ -853,7 +893,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
853
893
|
drop_input_cols=self._drop_input_cols,
|
854
894
|
expected_output_cols_type="float",
|
855
895
|
)
|
856
|
-
expected_output_cols = self.
|
896
|
+
expected_output_cols, _ = self._align_expected_output(
|
857
897
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
858
898
|
)
|
859
899
|
|
@@ -921,7 +961,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
921
961
|
drop_input_cols=self._drop_input_cols,
|
922
962
|
expected_output_cols_type="float",
|
923
963
|
)
|
924
|
-
expected_output_cols = self.
|
964
|
+
expected_output_cols, _ = self._align_expected_output(
|
925
965
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
926
966
|
)
|
927
967
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -984,7 +1024,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
984
1024
|
drop_input_cols=self._drop_input_cols,
|
985
1025
|
expected_output_cols_type="float",
|
986
1026
|
)
|
987
|
-
expected_output_cols = self.
|
1027
|
+
expected_output_cols, _ = self._align_expected_output(
|
988
1028
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
989
1029
|
)
|
990
1030
|
|
@@ -1049,7 +1089,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1049
1089
|
drop_input_cols = self._drop_input_cols,
|
1050
1090
|
expected_output_cols_type="float",
|
1051
1091
|
)
|
1052
|
-
expected_output_cols = self.
|
1092
|
+
expected_output_cols, _ = self._align_expected_output(
|
1053
1093
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
1054
1094
|
)
|
1055
1095
|
|
@@ -1114,7 +1154,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1114
1154
|
transform_kwargs = dict(
|
1115
1155
|
session=dataset._session,
|
1116
1156
|
dependencies=self._deps,
|
1117
|
-
score_sproc_imports=['xgboost'],
|
1157
|
+
score_sproc_imports=['xgboost', 'sklearn'],
|
1118
1158
|
)
|
1119
1159
|
elif isinstance(dataset, pd.DataFrame):
|
1120
1160
|
# pandas_handler.score() does not require any extra kwargs.
|
@@ -4,18 +4,17 @@
|
|
4
4
|
#
|
5
5
|
import inspect
|
6
6
|
import os
|
7
|
-
import
|
8
|
-
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
9
|
-
from typing_extensions import TypeGuard
|
7
|
+
from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
|
10
8
|
from uuid import uuid4
|
11
9
|
|
12
10
|
import cloudpickle as cp
|
13
|
-
import pandas as pd
|
14
11
|
import numpy as np
|
12
|
+
import pandas as pd
|
15
13
|
from numpy import typing as npt
|
16
14
|
|
17
15
|
|
18
16
|
import numpy
|
17
|
+
import sklearn
|
19
18
|
import xgboost
|
20
19
|
from sklearn.utils.metaestimators import available_if
|
21
20
|
|
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
|
|
23
22
|
from snowflake.ml._internal import telemetry
|
24
23
|
from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
|
25
24
|
from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
26
|
-
from snowflake.ml._internal.utils import
|
25
|
+
from snowflake.ml._internal.utils import identifier
|
27
26
|
from snowflake.snowpark import DataFrame, Session
|
28
27
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
28
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
30
29
|
from snowflake.ml.modeling._internal.transformer_protocols import (
|
31
|
-
ModelTransformHandlers,
|
32
30
|
BatchInferenceKwargsTypedDict,
|
33
31
|
ScoreKwargsTypedDict
|
34
32
|
)
|
@@ -363,7 +361,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
363
361
|
self.set_sample_weight_col(sample_weight_col)
|
364
362
|
self._use_external_memory_version = use_external_memory_version
|
365
363
|
self._batch_size = batch_size
|
366
|
-
deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
364
|
+
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
367
365
|
|
368
366
|
self._deps = list(deps)
|
369
367
|
|
@@ -699,12 +697,23 @@ class XGBRFRegressor(BaseTransformer):
|
|
699
697
|
autogenerated=self._autogenerated,
|
700
698
|
subproject=_SUBPROJECT,
|
701
699
|
)
|
702
|
-
|
703
|
-
|
704
|
-
expected_output_cols_list=(
|
705
|
-
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
706
|
-
),
|
700
|
+
expected_output_cols = (
|
701
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
707
702
|
)
|
703
|
+
if isinstance(dataset, DataFrame):
|
704
|
+
expected_output_cols, example_output_pd_df = self._align_expected_output(
|
705
|
+
"fit_predict", dataset, expected_output_cols, output_cols_prefix
|
706
|
+
)
|
707
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
708
|
+
drop_input_cols=self._drop_input_cols,
|
709
|
+
expected_output_cols_list=expected_output_cols,
|
710
|
+
example_output_pd_df=example_output_pd_df,
|
711
|
+
)
|
712
|
+
else:
|
713
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
714
|
+
drop_input_cols=self._drop_input_cols,
|
715
|
+
expected_output_cols_list=expected_output_cols,
|
716
|
+
)
|
708
717
|
self._sklearn_object = fitted_estimator
|
709
718
|
self._is_fitted = True
|
710
719
|
return output_result
|
@@ -783,12 +792,41 @@ class XGBRFRegressor(BaseTransformer):
|
|
783
792
|
|
784
793
|
return rv
|
785
794
|
|
786
|
-
def
|
787
|
-
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
788
|
-
) -> List[str]:
|
795
|
+
def _align_expected_output(
|
796
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
|
797
|
+
) -> Tuple[List[str], pd.DataFrame]:
|
798
|
+
""" Run 1 line of data with the desired method, and return one tuple that consists of the output column names
|
799
|
+
and output dataframe with 1 line.
|
800
|
+
If the method is fit_predict, run 2 lines of data.
|
801
|
+
"""
|
789
802
|
# in case the inferred output column names dimension is different
|
790
803
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
791
|
-
|
804
|
+
|
805
|
+
# For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
|
806
|
+
# so change the minimum of number of rows to 2
|
807
|
+
num_examples = 2
|
808
|
+
statement_params = telemetry.get_function_usage_statement_params(
|
809
|
+
project=_PROJECT,
|
810
|
+
subproject=_SUBPROJECT,
|
811
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
812
|
+
inspect.currentframe(), XGBRFRegressor.__class__.__name__
|
813
|
+
),
|
814
|
+
api_calls=[Session.call],
|
815
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
816
|
+
)
|
817
|
+
if output_cols_prefix == "fit_predict_":
|
818
|
+
if hasattr(self._sklearn_object, "n_clusters"):
|
819
|
+
# cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
|
820
|
+
num_examples = self._sklearn_object.n_clusters
|
821
|
+
elif hasattr(self._sklearn_object, "min_samples"):
|
822
|
+
# OPTICS default min_samples 5, which requires at least 5 lines of data
|
823
|
+
num_examples = self._sklearn_object.min_samples
|
824
|
+
elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
|
825
|
+
# LocalOutlierFactor expects n_neighbors <= n_samples
|
826
|
+
num_examples = self._sklearn_object.n_neighbors
|
827
|
+
sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
|
828
|
+
else:
|
829
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
|
792
830
|
|
793
831
|
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
794
832
|
# seen during the fit.
|
@@ -800,12 +838,14 @@ class XGBRFRegressor(BaseTransformer):
|
|
800
838
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
801
839
|
if self.sample_weight_col:
|
802
840
|
output_df_columns_set -= set(self.sample_weight_col)
|
841
|
+
|
803
842
|
# if the dimension of inferred output column names is correct; use it
|
804
843
|
if len(expected_output_cols_list) == len(output_df_columns_set):
|
805
|
-
return expected_output_cols_list
|
844
|
+
return expected_output_cols_list, output_df_pd
|
806
845
|
# otherwise, use the sklearn estimator's output
|
807
846
|
else:
|
808
|
-
|
847
|
+
expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
848
|
+
return expected_output_cols_list, output_df_pd[expected_output_cols_list]
|
809
849
|
|
810
850
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
811
851
|
@telemetry.send_api_usage_telemetry(
|
@@ -851,7 +891,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
851
891
|
drop_input_cols=self._drop_input_cols,
|
852
892
|
expected_output_cols_type="float",
|
853
893
|
)
|
854
|
-
expected_output_cols = self.
|
894
|
+
expected_output_cols, _ = self._align_expected_output(
|
855
895
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
856
896
|
)
|
857
897
|
|
@@ -917,7 +957,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
917
957
|
drop_input_cols=self._drop_input_cols,
|
918
958
|
expected_output_cols_type="float",
|
919
959
|
)
|
920
|
-
expected_output_cols = self.
|
960
|
+
expected_output_cols, _ = self._align_expected_output(
|
921
961
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
922
962
|
)
|
923
963
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -980,7 +1020,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
980
1020
|
drop_input_cols=self._drop_input_cols,
|
981
1021
|
expected_output_cols_type="float",
|
982
1022
|
)
|
983
|
-
expected_output_cols = self.
|
1023
|
+
expected_output_cols, _ = self._align_expected_output(
|
984
1024
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
985
1025
|
)
|
986
1026
|
|
@@ -1045,7 +1085,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
1045
1085
|
drop_input_cols = self._drop_input_cols,
|
1046
1086
|
expected_output_cols_type="float",
|
1047
1087
|
)
|
1048
|
-
expected_output_cols = self.
|
1088
|
+
expected_output_cols, _ = self._align_expected_output(
|
1049
1089
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
1050
1090
|
)
|
1051
1091
|
|
@@ -1110,7 +1150,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
1110
1150
|
transform_kwargs = dict(
|
1111
1151
|
session=dataset._session,
|
1112
1152
|
dependencies=self._deps,
|
1113
|
-
score_sproc_imports=['xgboost'],
|
1153
|
+
score_sproc_imports=['xgboost', 'sklearn'],
|
1114
1154
|
)
|
1115
1155
|
elif isinstance(dataset, pd.DataFrame):
|
1116
1156
|
# pandas_handler.score() does not require any extra kwargs.
|
@@ -9,7 +9,7 @@ from snowflake.ml._internal.human_readable_id import hrid_generator
|
|
9
9
|
from snowflake.ml._internal.utils import sql_identifier
|
10
10
|
from snowflake.ml.model import model_signature, type_hints as model_types
|
11
11
|
from snowflake.ml.model._client.model import model_impl, model_version_impl
|
12
|
-
from snowflake.ml.model._client.ops import metadata_ops, model_ops
|
12
|
+
from snowflake.ml.model._client.ops import metadata_ops, model_ops, service_ops
|
13
13
|
from snowflake.ml.model._model_composer import model_composer
|
14
14
|
from snowflake.ml.model._packager.model_meta import model_meta
|
15
15
|
from snowflake.snowpark import session
|
@@ -30,6 +30,9 @@ class ModelManager:
|
|
30
30
|
self._model_ops = model_ops.ModelOperator(
|
31
31
|
session, database_name=self._database_name, schema_name=self._schema_name
|
32
32
|
)
|
33
|
+
self._service_ops = service_ops.ServiceOperator(
|
34
|
+
session, database_name=self._database_name, schema_name=self._schema_name
|
35
|
+
)
|
33
36
|
self._hrid_generator = hrid_generator.HRID16()
|
34
37
|
|
35
38
|
def log_model(
|
@@ -47,6 +50,7 @@ class ModelManager:
|
|
47
50
|
sample_input_data: Optional[model_types.SupportedDataType] = None,
|
48
51
|
code_paths: Optional[List[str]] = None,
|
49
52
|
ext_modules: Optional[List[ModuleType]] = None,
|
53
|
+
model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
|
50
54
|
options: Optional[model_types.ModelSaveOption] = None,
|
51
55
|
statement_params: Optional[Dict[str, Any]] = None,
|
52
56
|
) -> model_version_impl.ModelVersion:
|
@@ -86,6 +90,7 @@ class ModelManager:
|
|
86
90
|
sample_input_data=sample_input_data,
|
87
91
|
code_paths=code_paths,
|
88
92
|
ext_modules=ext_modules,
|
93
|
+
model_objective=model_objective,
|
89
94
|
options=options,
|
90
95
|
statement_params=statement_params,
|
91
96
|
)
|
@@ -105,6 +110,7 @@ class ModelManager:
|
|
105
110
|
sample_input_data: Optional[model_types.SupportedDataType] = None,
|
106
111
|
code_paths: Optional[List[str]] = None,
|
107
112
|
ext_modules: Optional[List[ModuleType]] = None,
|
113
|
+
model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
|
108
114
|
options: Optional[model_types.ModelSaveOption] = None,
|
109
115
|
statement_params: Optional[Dict[str, Any]] = None,
|
110
116
|
) -> model_version_impl.ModelVersion:
|
@@ -153,6 +159,7 @@ class ModelManager:
|
|
153
159
|
code_paths=code_paths,
|
154
160
|
ext_modules=ext_modules,
|
155
161
|
options=options,
|
162
|
+
model_objective=model_objective,
|
156
163
|
)
|
157
164
|
statement_params = telemetry.add_statement_params_custom_tags(
|
158
165
|
statement_params, model_metadata.telemetry_metadata()
|
@@ -173,11 +180,16 @@ class ModelManager:
|
|
173
180
|
)
|
174
181
|
|
175
182
|
mv = model_version_impl.ModelVersion._ref(
|
176
|
-
model_ops.ModelOperator(
|
183
|
+
model_ops=model_ops.ModelOperator(
|
177
184
|
self._model_ops._session,
|
178
185
|
database_name=database_name_id or self._database_name,
|
179
186
|
schema_name=schema_name_id or self._schema_name,
|
180
187
|
),
|
188
|
+
service_ops=service_ops.ServiceOperator(
|
189
|
+
self._service_ops._session,
|
190
|
+
database_name=database_name_id or self._database_name,
|
191
|
+
schema_name=schema_name_id or self._schema_name,
|
192
|
+
),
|
181
193
|
model_name=model_name_id,
|
182
194
|
version_name=version_name_id,
|
183
195
|
)
|
@@ -216,6 +228,11 @@ class ModelManager:
|
|
216
228
|
database_name=database_name_id or self._database_name,
|
217
229
|
schema_name=schema_name_id or self._schema_name,
|
218
230
|
),
|
231
|
+
service_ops=service_ops.ServiceOperator(
|
232
|
+
self._service_ops._session,
|
233
|
+
database_name=database_name_id or self._database_name,
|
234
|
+
schema_name=schema_name_id or self._schema_name,
|
235
|
+
),
|
219
236
|
model_name=model_name_id,
|
220
237
|
)
|
221
238
|
else:
|
@@ -234,6 +251,7 @@ class ModelManager:
|
|
234
251
|
return [
|
235
252
|
model_impl.Model._ref(
|
236
253
|
self._model_ops,
|
254
|
+
service_ops=self._service_ops,
|
237
255
|
model_name=model_name,
|
238
256
|
)
|
239
257
|
for model_name in model_names
|
@@ -576,7 +576,7 @@ fully integrated into the new registry.
|
|
576
576
|
raw_stage_path = uri.get_snowflake_stage_path_from_uri(model_uri)
|
577
577
|
if not raw_stage_path:
|
578
578
|
return None
|
579
|
-
(db, schema, stage, _) = identifier.
|
579
|
+
(db, schema, stage, _) = identifier.parse_snowflake_stage_path(raw_stage_path)
|
580
580
|
return identifier.get_schema_level_object_identifier(db, schema, stage)
|
581
581
|
|
582
582
|
def _list_selected_models(
|
@@ -244,8 +244,7 @@ class Registry:
|
|
244
244
|
warnings.warn(
|
245
245
|
"Models logged specifying `pip_requirements` can not be executed "
|
246
246
|
"in Snowflake Warehouse where all dependencies are required to be retrieved "
|
247
|
-
"from Snowflake Anaconda Channel.
|
248
|
-
"to log model with pip dependencies.",
|
247
|
+
"from Snowflake Anaconda Channel.",
|
249
248
|
category=UserWarning,
|
250
249
|
stacklevel=1,
|
251
250
|
)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import Dict
|
3
|
+
|
4
|
+
|
5
|
+
class CreationOption(Enum):
|
6
|
+
FAIL_IF_NOT_EXIST = 1
|
7
|
+
CREATE_IF_NOT_EXIST = 2
|
8
|
+
OR_REPLACE = 3
|
9
|
+
|
10
|
+
|
11
|
+
class CreationMode:
|
12
|
+
def __init__(self, *, if_not_exists: bool = False, or_replace: bool = False) -> None:
|
13
|
+
self.if_not_exists = if_not_exists
|
14
|
+
self.or_replace = or_replace
|
15
|
+
|
16
|
+
def get_ddl_phrases(self) -> Dict[CreationOption, str]:
|
17
|
+
if_not_exists_sql = " IF NOT EXISTS" if self.if_not_exists else ""
|
18
|
+
or_replace_sql = " OR REPLACE" if self.or_replace else ""
|
19
|
+
return {
|
20
|
+
CreationOption.CREATE_IF_NOT_EXIST: if_not_exists_sql,
|
21
|
+
CreationOption.OR_REPLACE: or_replace_sql,
|
22
|
+
}
|
snowflake/ml/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION="1.6.
|
1
|
+
VERSION="1.6.2"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: snowflake-ml-python
|
3
|
-
Version: 1.6.
|
3
|
+
Version: 1.6.2
|
4
4
|
Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
|
5
5
|
Author-email: "Snowflake, Inc" <support@snowflake.com>
|
6
6
|
License:
|
@@ -253,7 +253,7 @@ Requires-Dist: snowflake-connector-python[pandas] <4,>=3.5.0
|
|
253
253
|
Requires-Dist: snowflake-snowpark-python <2,>=1.17.0
|
254
254
|
Requires-Dist: sqlparse <1,>=0.4
|
255
255
|
Requires-Dist: typing-extensions <5,>=4.1.0
|
256
|
-
Requires-Dist: xgboost <2,>=1.7.3
|
256
|
+
Requires-Dist: xgboost <2.1,>=1.7.3
|
257
257
|
Provides-Extra: all
|
258
258
|
Requires-Dist: catboost <2,>=1.2.0 ; extra == 'all'
|
259
259
|
Requires-Dist: lightgbm <5,>=3.3.5 ; extra == 'all'
|
@@ -373,7 +373,51 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
|
|
373
373
|
|
374
374
|
# Release History
|
375
375
|
|
376
|
-
## 1.6.
|
376
|
+
## 1.6.2 (TBD)
|
377
|
+
|
378
|
+
### Bug Fixes
|
379
|
+
|
380
|
+
- Modeling: Support XGBoost version that is larger than 2.
|
381
|
+
|
382
|
+
- Data: Fix multiple epoch iteration over `DataConnector.to_torch_datapipe()` DataPipes.
|
383
|
+
- Generic: Fix a bug that when an invalid name is provided to argument where fully qualified name is expected, it will
|
384
|
+
be parsed wrongly. Now it raises an exception correctly.
|
385
|
+
- Model Explainability: Handle explanations for multiclass XGBoost classification models
|
386
|
+
- Model Explainability: Workarounds and better error handling for XGB>2.1.0 not working with SHAP==0.42.1
|
387
|
+
|
388
|
+
### New Features
|
389
|
+
|
390
|
+
- Data: Add top-level exports for `DataConnector` and `DataSource` to `snowflake.ml.data`.
|
391
|
+
- Data: Add native batching support via `batch_size` and `drop_last_batch` arguments to `DataConnector.to_torch_dataset()`
|
392
|
+
- Feature Store: update_feature_view() supports taking feature view object as argument.
|
393
|
+
|
394
|
+
### Behavior Changes
|
395
|
+
|
396
|
+
## 1.6.1 (2024-08-12)
|
397
|
+
|
398
|
+
### Bug Fixes
|
399
|
+
|
400
|
+
- Feature Store: Support large metadata blob when generating dataset
|
401
|
+
- Feature Store: Added a hidden knob in FeatureView as kargs for setting customized
|
402
|
+
refresh_mode
|
403
|
+
- Registry: Fix an error message in Model Version `run` when `function_name` is not mentioned and model has multiple
|
404
|
+
target methods.
|
405
|
+
- Cortex inference: snowflake.cortex.Complete now only uses the REST API for streaming and the use_rest_api_experimental
|
406
|
+
is no longer needed.
|
407
|
+
- Feature Store: Add a new API: FeatureView.list_columns() which list all column information.
|
408
|
+
- Data: Fix `DataFrame` ingestion with `ArrowIngestor`.
|
409
|
+
|
410
|
+
### New Features
|
411
|
+
|
412
|
+
- Enable `set_params` to set the parameters of the underlying sklearn estimator, if the snowflake-ml model has been fit.
|
413
|
+
- Data: Add `snowflake.ml.data.ingestor_utils` module with utility functions helpful for `DataIngestor` implementations.
|
414
|
+
- Data: Add new `to_torch_dataset()` connector to `DataConnector` to replace deprecated DataPipe.
|
415
|
+
- Registry: Option to `enable_explainability` set to True by default for XGBoost, LightGBM and CatBoost as PuPr feature.
|
416
|
+
- Registry: Option to `enable_explainability` when registering SHAP supported sklearn models.
|
417
|
+
|
418
|
+
### Behavior Changes
|
419
|
+
|
420
|
+
## 1.6.0 (2024-07-29)
|
377
421
|
|
378
422
|
### Bug Fixes
|
379
423
|
|
@@ -402,6 +446,14 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
|
|
402
446
|
distributed_hpo_trainer.ENABLE_EFFICIENT_MEMORY_USAGE = False
|
403
447
|
`
|
404
448
|
- Registry: Option to `enable_explainability` when registering LightGBM models as a pre-PuPr feature.
|
449
|
+
- Data: Add new `snowflake.ml.data` preview module which contains data reading utilities like `DataConnector`
|
450
|
+
- `DataConnector` provides efficient connectors from Snowpark `DataFrame`
|
451
|
+
and Snowpark ML `Dataset` to external frameworks like PyTorch, TensorFlow, and Pandas. Create `DataConnector`
|
452
|
+
instances using the classmethod constructors `DataConnector.from_dataset()` and `DataConnector.from_dataframe()`.
|
453
|
+
- Data: Add new `DataConnector.from_sources()` classmethod constructor for constructing from `DataSource` objects.
|
454
|
+
- Data: Add new `ingestor_class` arg to `DataConnector` classmethod constructors for easier `DataIngestor` injection.
|
455
|
+
- Dataset: `DatasetReader` now subclasses new `DataConnector` class.
|
456
|
+
- Add optional `limit` arg to `DatasetReader.to_pandas()`
|
405
457
|
|
406
458
|
### Behavior Changes
|
407
459
|
|