snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +3 -3
- snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
- snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
- snowflake/ml/_internal/telemetry.py +11 -2
- snowflake/ml/_internal/utils/formatting.py +1 -1
- snowflake/ml/feature_store/feature_store.py +15 -106
- snowflake/ml/fileset/sfcfs.py +4 -3
- snowflake/ml/fileset/stage_fs.py +18 -0
- snowflake/ml/model/_api.py +9 -9
- snowflake/ml/model/_client/model/model_version_impl.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
- snowflake/ml/model/_model_composer/model_composer.py +10 -8
- snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
- snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
- snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
- snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
- snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
- snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_packager.py +8 -6
- snowflake/ml/model/custom_model.py +3 -1
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
- snowflake/ml/modeling/_internal/model_specifications.py +3 -1
- snowflake/ml/modeling/_internal/model_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
- snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
- snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
- snowflake/ml/modeling/cluster/birch.py +33 -61
- snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
- snowflake/ml/modeling/cluster/dbscan.py +33 -61
- snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
- snowflake/ml/modeling/cluster/k_means.py +33 -61
- snowflake/ml/modeling/cluster/mean_shift.py +33 -61
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
- snowflake/ml/modeling/cluster/optics.py +33 -61
- snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
- snowflake/ml/modeling/compose/column_transformer.py +33 -61
- snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
- snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
- snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
- snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
- snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
- snowflake/ml/modeling/covariance/oas.py +33 -61
- snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
- snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
- snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
- snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
- snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/pca.py +33 -61
- snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
- snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
- snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
- snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
- snowflake/ml/modeling/framework/base.py +55 -5
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
- snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
- snowflake/ml/modeling/impute/knn_imputer.py +33 -61
- snowflake/ml/modeling/impute/missing_indicator.py +33 -61
- snowflake/ml/modeling/impute/simple_imputer.py +4 -15
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
- snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/lars.py +33 -61
- snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
- snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/perceptron.py +33 -61
- snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ridge.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
- snowflake/ml/modeling/manifold/isomap.py +33 -61
- snowflake/ml/modeling/manifold/mds.py +33 -61
- snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
- snowflake/ml/modeling/manifold/tsne.py +33 -61
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
- snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
- snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
- snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
- snowflake/ml/modeling/svm/linear_svc.py +33 -61
- snowflake/ml/modeling/svm/linear_svr.py +33 -61
- snowflake/ml/modeling/svm/nu_svc.py +33 -61
- snowflake/ml/modeling/svm/nu_svr.py +33 -61
- snowflake/ml/modeling/svm/svc.py +33 -61
- snowflake/ml/modeling/svm/svr.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
- snowflake/ml/registry/_manager/model_manager.py +6 -2
- snowflake/ml/registry/model_registry.py +100 -27
- snowflake/ml/registry/registry.py +6 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -487,18 +487,24 @@ class XGBRFClassifier(BaseTransformer):
|
|
487
487
|
self._get_model_signatures(dataset)
|
488
488
|
return self
|
489
489
|
|
490
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
491
|
-
if self._drop_input_cols:
|
492
|
-
return []
|
493
|
-
else:
|
494
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
495
|
-
|
496
490
|
def _batch_inference_validate_snowpark(
|
497
491
|
self,
|
498
492
|
dataset: DataFrame,
|
499
493
|
inference_method: str,
|
500
494
|
) -> List[str]:
|
501
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
495
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
496
|
+
return the available package that exists in the snowflake anaconda channel
|
497
|
+
|
498
|
+
Args:
|
499
|
+
dataset: snowpark dataframe
|
500
|
+
inference_method: the inference method such as predict, score...
|
501
|
+
|
502
|
+
Raises:
|
503
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
504
|
+
SnowflakeMLException: If the session is None, raise error
|
505
|
+
|
506
|
+
Returns:
|
507
|
+
A list of available package that exists in the snowflake anaconda channel
|
502
508
|
"""
|
503
509
|
if not self._is_fitted:
|
504
510
|
raise exceptions.SnowflakeMLException(
|
@@ -572,7 +578,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
572
578
|
transform_kwargs = dict(
|
573
579
|
session = dataset._session,
|
574
580
|
dependencies = self._deps,
|
575
|
-
|
581
|
+
drop_input_cols = self._drop_input_cols,
|
576
582
|
expected_output_cols_type = expected_type_inferred,
|
577
583
|
)
|
578
584
|
|
@@ -632,16 +638,16 @@ class XGBRFClassifier(BaseTransformer):
|
|
632
638
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
633
639
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
634
640
|
# each row containing a list of values.
|
635
|
-
expected_dtype = "
|
641
|
+
expected_dtype = "array"
|
636
642
|
|
637
643
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
638
644
|
if expected_dtype == "":
|
639
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
645
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
640
646
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
641
|
-
expected_dtype = "
|
642
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
647
|
+
expected_dtype = "array"
|
648
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
643
649
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
644
|
-
expected_dtype = "
|
650
|
+
expected_dtype = "array"
|
645
651
|
else:
|
646
652
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
647
653
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -659,7 +665,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
659
665
|
transform_kwargs = dict(
|
660
666
|
session = dataset._session,
|
661
667
|
dependencies = self._deps,
|
662
|
-
|
668
|
+
drop_input_cols = self._drop_input_cols,
|
663
669
|
expected_output_cols_type = expected_dtype,
|
664
670
|
)
|
665
671
|
|
@@ -710,7 +716,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
710
716
|
subproject=_SUBPROJECT,
|
711
717
|
)
|
712
718
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
713
|
-
|
719
|
+
drop_input_cols=self._drop_input_cols,
|
714
720
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
715
721
|
)
|
716
722
|
self._sklearn_object = fitted_estimator
|
@@ -728,44 +734,6 @@ class XGBRFClassifier(BaseTransformer):
|
|
728
734
|
assert self._sklearn_object is not None
|
729
735
|
return self._sklearn_object.embedding_
|
730
736
|
|
731
|
-
|
732
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
733
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
734
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
735
|
-
"""
|
736
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
737
|
-
if output_cols:
|
738
|
-
output_cols = [
|
739
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
740
|
-
for c in output_cols
|
741
|
-
]
|
742
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
743
|
-
output_cols = [output_cols_prefix]
|
744
|
-
elif self._sklearn_object is not None:
|
745
|
-
classes = self._sklearn_object.classes_
|
746
|
-
if isinstance(classes, numpy.ndarray):
|
747
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
748
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
749
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
750
|
-
output_cols = []
|
751
|
-
for i, cl in enumerate(classes):
|
752
|
-
# For binary classification, there is only one output column for each class
|
753
|
-
# ndarray as the two classes are complementary.
|
754
|
-
if len(cl) == 2:
|
755
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
756
|
-
else:
|
757
|
-
output_cols.extend([
|
758
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
759
|
-
])
|
760
|
-
else:
|
761
|
-
output_cols = []
|
762
|
-
|
763
|
-
# Make sure column names are valid snowflake identifiers.
|
764
|
-
assert output_cols is not None # Make MyPy happy
|
765
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
766
|
-
|
767
|
-
return rv
|
768
|
-
|
769
737
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
770
738
|
@telemetry.send_api_usage_telemetry(
|
771
739
|
project=_PROJECT,
|
@@ -807,7 +775,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
807
775
|
transform_kwargs = dict(
|
808
776
|
session=dataset._session,
|
809
777
|
dependencies=self._deps,
|
810
|
-
|
778
|
+
drop_input_cols = self._drop_input_cols,
|
811
779
|
expected_output_cols_type="float",
|
812
780
|
)
|
813
781
|
|
@@ -874,7 +842,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
874
842
|
transform_kwargs = dict(
|
875
843
|
session=dataset._session,
|
876
844
|
dependencies=self._deps,
|
877
|
-
|
845
|
+
drop_input_cols = self._drop_input_cols,
|
878
846
|
expected_output_cols_type="float",
|
879
847
|
)
|
880
848
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -935,7 +903,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
935
903
|
transform_kwargs = dict(
|
936
904
|
session=dataset._session,
|
937
905
|
dependencies=self._deps,
|
938
|
-
|
906
|
+
drop_input_cols = self._drop_input_cols,
|
939
907
|
expected_output_cols_type="float",
|
940
908
|
)
|
941
909
|
|
@@ -1000,7 +968,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1000
968
|
transform_kwargs = dict(
|
1001
969
|
session=dataset._session,
|
1002
970
|
dependencies=self._deps,
|
1003
|
-
|
971
|
+
drop_input_cols = self._drop_input_cols,
|
1004
972
|
expected_output_cols_type="float",
|
1005
973
|
)
|
1006
974
|
|
@@ -1056,13 +1024,17 @@ class XGBRFClassifier(BaseTransformer):
|
|
1056
1024
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
1057
1025
|
|
1058
1026
|
if isinstance(dataset, DataFrame):
|
1027
|
+
self._deps = self._batch_inference_validate_snowpark(
|
1028
|
+
dataset=dataset,
|
1029
|
+
inference_method="score",
|
1030
|
+
)
|
1059
1031
|
selected_cols = self._get_active_columns()
|
1060
1032
|
if len(selected_cols) > 0:
|
1061
1033
|
dataset = dataset.select(selected_cols)
|
1062
1034
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
1063
1035
|
transform_kwargs = dict(
|
1064
1036
|
session=dataset._session,
|
1065
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
1037
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
1066
1038
|
score_sproc_imports=['xgboost'],
|
1067
1039
|
)
|
1068
1040
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -1136,9 +1108,9 @@ class XGBRFClassifier(BaseTransformer):
|
|
1136
1108
|
transform_kwargs = dict(
|
1137
1109
|
session = dataset._session,
|
1138
1110
|
dependencies = self._deps,
|
1139
|
-
|
1140
|
-
expected_output_cols_type
|
1141
|
-
n_neighbors =
|
1111
|
+
drop_input_cols = self._drop_input_cols,
|
1112
|
+
expected_output_cols_type="array",
|
1113
|
+
n_neighbors = n_neighbors,
|
1142
1114
|
return_distance = return_distance
|
1143
1115
|
)
|
1144
1116
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -487,18 +487,24 @@ class XGBRFRegressor(BaseTransformer):
|
|
487
487
|
self._get_model_signatures(dataset)
|
488
488
|
return self
|
489
489
|
|
490
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
491
|
-
if self._drop_input_cols:
|
492
|
-
return []
|
493
|
-
else:
|
494
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
495
|
-
|
496
490
|
def _batch_inference_validate_snowpark(
|
497
491
|
self,
|
498
492
|
dataset: DataFrame,
|
499
493
|
inference_method: str,
|
500
494
|
) -> List[str]:
|
501
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
495
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
496
|
+
return the available package that exists in the snowflake anaconda channel
|
497
|
+
|
498
|
+
Args:
|
499
|
+
dataset: snowpark dataframe
|
500
|
+
inference_method: the inference method such as predict, score...
|
501
|
+
|
502
|
+
Raises:
|
503
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
504
|
+
SnowflakeMLException: If the session is None, raise error
|
505
|
+
|
506
|
+
Returns:
|
507
|
+
A list of available package that exists in the snowflake anaconda channel
|
502
508
|
"""
|
503
509
|
if not self._is_fitted:
|
504
510
|
raise exceptions.SnowflakeMLException(
|
@@ -572,7 +578,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
572
578
|
transform_kwargs = dict(
|
573
579
|
session = dataset._session,
|
574
580
|
dependencies = self._deps,
|
575
|
-
|
581
|
+
drop_input_cols = self._drop_input_cols,
|
576
582
|
expected_output_cols_type = expected_type_inferred,
|
577
583
|
)
|
578
584
|
|
@@ -632,16 +638,16 @@ class XGBRFRegressor(BaseTransformer):
|
|
632
638
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
633
639
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
634
640
|
# each row containing a list of values.
|
635
|
-
expected_dtype = "
|
641
|
+
expected_dtype = "array"
|
636
642
|
|
637
643
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
638
644
|
if expected_dtype == "":
|
639
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
645
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
640
646
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
641
|
-
expected_dtype = "
|
642
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
647
|
+
expected_dtype = "array"
|
648
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
643
649
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
644
|
-
expected_dtype = "
|
650
|
+
expected_dtype = "array"
|
645
651
|
else:
|
646
652
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
647
653
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -659,7 +665,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
659
665
|
transform_kwargs = dict(
|
660
666
|
session = dataset._session,
|
661
667
|
dependencies = self._deps,
|
662
|
-
|
668
|
+
drop_input_cols = self._drop_input_cols,
|
663
669
|
expected_output_cols_type = expected_dtype,
|
664
670
|
)
|
665
671
|
|
@@ -710,7 +716,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
710
716
|
subproject=_SUBPROJECT,
|
711
717
|
)
|
712
718
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
713
|
-
|
719
|
+
drop_input_cols=self._drop_input_cols,
|
714
720
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
715
721
|
)
|
716
722
|
self._sklearn_object = fitted_estimator
|
@@ -728,44 +734,6 @@ class XGBRFRegressor(BaseTransformer):
|
|
728
734
|
assert self._sklearn_object is not None
|
729
735
|
return self._sklearn_object.embedding_
|
730
736
|
|
731
|
-
|
732
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
733
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
734
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
735
|
-
"""
|
736
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
737
|
-
if output_cols:
|
738
|
-
output_cols = [
|
739
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
740
|
-
for c in output_cols
|
741
|
-
]
|
742
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
743
|
-
output_cols = [output_cols_prefix]
|
744
|
-
elif self._sklearn_object is not None:
|
745
|
-
classes = self._sklearn_object.classes_
|
746
|
-
if isinstance(classes, numpy.ndarray):
|
747
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
748
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
749
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
750
|
-
output_cols = []
|
751
|
-
for i, cl in enumerate(classes):
|
752
|
-
# For binary classification, there is only one output column for each class
|
753
|
-
# ndarray as the two classes are complementary.
|
754
|
-
if len(cl) == 2:
|
755
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
756
|
-
else:
|
757
|
-
output_cols.extend([
|
758
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
759
|
-
])
|
760
|
-
else:
|
761
|
-
output_cols = []
|
762
|
-
|
763
|
-
# Make sure column names are valid snowflake identifiers.
|
764
|
-
assert output_cols is not None # Make MyPy happy
|
765
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
766
|
-
|
767
|
-
return rv
|
768
|
-
|
769
737
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
770
738
|
@telemetry.send_api_usage_telemetry(
|
771
739
|
project=_PROJECT,
|
@@ -805,7 +773,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
805
773
|
transform_kwargs = dict(
|
806
774
|
session=dataset._session,
|
807
775
|
dependencies=self._deps,
|
808
|
-
|
776
|
+
drop_input_cols = self._drop_input_cols,
|
809
777
|
expected_output_cols_type="float",
|
810
778
|
)
|
811
779
|
|
@@ -870,7 +838,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
870
838
|
transform_kwargs = dict(
|
871
839
|
session=dataset._session,
|
872
840
|
dependencies=self._deps,
|
873
|
-
|
841
|
+
drop_input_cols = self._drop_input_cols,
|
874
842
|
expected_output_cols_type="float",
|
875
843
|
)
|
876
844
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -931,7 +899,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
931
899
|
transform_kwargs = dict(
|
932
900
|
session=dataset._session,
|
933
901
|
dependencies=self._deps,
|
934
|
-
|
902
|
+
drop_input_cols = self._drop_input_cols,
|
935
903
|
expected_output_cols_type="float",
|
936
904
|
)
|
937
905
|
|
@@ -996,7 +964,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
996
964
|
transform_kwargs = dict(
|
997
965
|
session=dataset._session,
|
998
966
|
dependencies=self._deps,
|
999
|
-
|
967
|
+
drop_input_cols = self._drop_input_cols,
|
1000
968
|
expected_output_cols_type="float",
|
1001
969
|
)
|
1002
970
|
|
@@ -1052,13 +1020,17 @@ class XGBRFRegressor(BaseTransformer):
|
|
1052
1020
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
1053
1021
|
|
1054
1022
|
if isinstance(dataset, DataFrame):
|
1023
|
+
self._deps = self._batch_inference_validate_snowpark(
|
1024
|
+
dataset=dataset,
|
1025
|
+
inference_method="score",
|
1026
|
+
)
|
1055
1027
|
selected_cols = self._get_active_columns()
|
1056
1028
|
if len(selected_cols) > 0:
|
1057
1029
|
dataset = dataset.select(selected_cols)
|
1058
1030
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
1059
1031
|
transform_kwargs = dict(
|
1060
1032
|
session=dataset._session,
|
1061
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
1033
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
1062
1034
|
score_sproc_imports=['xgboost'],
|
1063
1035
|
)
|
1064
1036
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -1132,9 +1104,9 @@ class XGBRFRegressor(BaseTransformer):
|
|
1132
1104
|
transform_kwargs = dict(
|
1133
1105
|
session = dataset._session,
|
1134
1106
|
dependencies = self._deps,
|
1135
|
-
|
1136
|
-
expected_output_cols_type
|
1137
|
-
n_neighbors =
|
1107
|
+
drop_input_cols = self._drop_input_cols,
|
1108
|
+
expected_output_cols_type="array",
|
1109
|
+
n_neighbors = n_neighbors,
|
1138
1110
|
return_distance = return_distance
|
1139
1111
|
)
|
1140
1112
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional
|
|
4
4
|
import pandas as pd
|
5
5
|
from absl.logging import logging
|
6
6
|
|
7
|
+
from snowflake.ml._internal.human_readable_id import hrid_generator
|
7
8
|
from snowflake.ml._internal.utils import sql_identifier
|
8
9
|
from snowflake.ml.model import model_signature, type_hints as model_types
|
9
10
|
from snowflake.ml.model._client.model import model_impl, model_version_impl
|
@@ -27,13 +28,14 @@ class ModelManager:
|
|
27
28
|
self._model_ops = model_ops.ModelOperator(
|
28
29
|
session, database_name=self._database_name, schema_name=self._schema_name
|
29
30
|
)
|
31
|
+
self._hrid_generator = hrid_generator.HRID16()
|
30
32
|
|
31
33
|
def log_model(
|
32
34
|
self,
|
33
35
|
model: model_types.SupportedModelType,
|
34
36
|
*,
|
35
37
|
model_name: str,
|
36
|
-
version_name: str,
|
38
|
+
version_name: Optional[str] = None,
|
37
39
|
comment: Optional[str] = None,
|
38
40
|
metrics: Optional[Dict[str, Any]] = None,
|
39
41
|
conda_dependencies: Optional[List[str]] = None,
|
@@ -48,6 +50,8 @@ class ModelManager:
|
|
48
50
|
) -> model_version_impl.ModelVersion:
|
49
51
|
model_name_id = sql_identifier.SqlIdentifier(model_name)
|
50
52
|
|
53
|
+
if not version_name:
|
54
|
+
version_name = self._hrid_generator.generate()[1]
|
51
55
|
version_name_id = sql_identifier.SqlIdentifier(version_name)
|
52
56
|
|
53
57
|
if self._model_ops.validate_existence(
|
@@ -68,7 +72,7 @@ class ModelManager:
|
|
68
72
|
name=model_name_id.resolved(),
|
69
73
|
model=model,
|
70
74
|
signatures=signatures,
|
71
|
-
|
75
|
+
sample_input_data=sample_input_data,
|
72
76
|
conda_dependencies=conda_dependencies,
|
73
77
|
pip_requirements=pip_requirements,
|
74
78
|
python_version=python_version,
|