snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
26
27
|
from snowflake.snowpark import DataFrame, Session
|
27
28
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
28
29
|
from snowflake.snowpark.types import PandasSeries
|
30
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
31
|
|
30
32
|
from snowflake.ml.model.model_signature import (
|
31
33
|
DataType,
|
@@ -390,7 +392,6 @@ class XGBRegressor(BaseTransformer):
|
|
390
392
|
**kwargs,
|
391
393
|
) -> None:
|
392
394
|
super().__init__()
|
393
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
394
395
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
395
396
|
|
396
397
|
self._deps = list(deps)
|
@@ -411,6 +412,15 @@ class XGBRegressor(BaseTransformer):
|
|
411
412
|
self.set_drop_input_cols(drop_input_cols)
|
412
413
|
self.set_sample_weight_col(sample_weight_col)
|
413
414
|
|
415
|
+
def _get_rand_id(self) -> str:
|
416
|
+
"""
|
417
|
+
Generate random id to be used in sproc and stage names.
|
418
|
+
|
419
|
+
Returns:
|
420
|
+
Random id string usable in sproc, table, and stage names.
|
421
|
+
"""
|
422
|
+
return str(uuid4()).replace("-", "_").upper()
|
423
|
+
|
414
424
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
415
425
|
"""
|
416
426
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -489,7 +499,7 @@ class XGBRegressor(BaseTransformer):
|
|
489
499
|
cp.dump(self._sklearn_object, local_transform_file)
|
490
500
|
|
491
501
|
# Create temp stage to run fit.
|
492
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
502
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
493
503
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
494
504
|
SqlResultValidator(
|
495
505
|
session=session,
|
@@ -502,11 +512,12 @@ class XGBRegressor(BaseTransformer):
|
|
502
512
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
503
513
|
).validate()
|
504
514
|
|
505
|
-
|
515
|
+
# Use posixpath to construct stage paths
|
516
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
517
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
506
518
|
local_result_file_name = get_temp_file_path()
|
507
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
508
519
|
|
509
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
520
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
510
521
|
statement_params = telemetry.get_function_usage_statement_params(
|
511
522
|
project=_PROJECT,
|
512
523
|
subproject=_SUBPROJECT,
|
@@ -532,6 +543,7 @@ class XGBRegressor(BaseTransformer):
|
|
532
543
|
replace=True,
|
533
544
|
session=session,
|
534
545
|
statement_params=statement_params,
|
546
|
+
anonymous=True
|
535
547
|
)
|
536
548
|
def fit_wrapper_sproc(
|
537
549
|
session: Session,
|
@@ -540,7 +552,8 @@ class XGBRegressor(BaseTransformer):
|
|
540
552
|
stage_result_file_name: str,
|
541
553
|
input_cols: List[str],
|
542
554
|
label_cols: List[str],
|
543
|
-
sample_weight_col: Optional[str]
|
555
|
+
sample_weight_col: Optional[str],
|
556
|
+
statement_params: Dict[str, str]
|
544
557
|
) -> str:
|
545
558
|
import cloudpickle as cp
|
546
559
|
import numpy as np
|
@@ -607,15 +620,15 @@ class XGBRegressor(BaseTransformer):
|
|
607
620
|
api_calls=[Session.call],
|
608
621
|
custom_tags=dict([("autogen", True)]),
|
609
622
|
)
|
610
|
-
sproc_export_file_name =
|
611
|
-
|
623
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
624
|
+
session,
|
612
625
|
query,
|
613
626
|
stage_transform_file_name,
|
614
627
|
stage_result_file_name,
|
615
628
|
identifier.get_unescaped_names(self.input_cols),
|
616
629
|
identifier.get_unescaped_names(self.label_cols),
|
617
630
|
identifier.get_unescaped_names(self.sample_weight_col),
|
618
|
-
statement_params
|
631
|
+
statement_params,
|
619
632
|
)
|
620
633
|
|
621
634
|
if "|" in sproc_export_file_name:
|
@@ -625,7 +638,7 @@ class XGBRegressor(BaseTransformer):
|
|
625
638
|
print("\n".join(fields[1:]))
|
626
639
|
|
627
640
|
session.file.get(
|
628
|
-
|
641
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
629
642
|
local_result_file_name,
|
630
643
|
statement_params=statement_params
|
631
644
|
)
|
@@ -671,7 +684,7 @@ class XGBRegressor(BaseTransformer):
|
|
671
684
|
|
672
685
|
# Register vectorized UDF for batch inference
|
673
686
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
674
|
-
safe_id=self.
|
687
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
675
688
|
|
676
689
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
677
690
|
# will try to pickle all of self which fails.
|
@@ -763,7 +776,7 @@ class XGBRegressor(BaseTransformer):
|
|
763
776
|
return transformed_pandas_df.to_dict("records")
|
764
777
|
|
765
778
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
766
|
-
safe_id=self.
|
779
|
+
safe_id=self._get_rand_id()
|
767
780
|
)
|
768
781
|
|
769
782
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -930,11 +943,18 @@ class XGBRegressor(BaseTransformer):
|
|
930
943
|
Transformed dataset.
|
931
944
|
"""
|
932
945
|
if isinstance(dataset, DataFrame):
|
946
|
+
expected_type_inferred = "float"
|
947
|
+
# when it is classifier, infer the datatype from label columns
|
948
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
949
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
950
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
951
|
+
)
|
952
|
+
|
933
953
|
output_df = self._batch_inference(
|
934
954
|
dataset=dataset,
|
935
955
|
inference_method="predict",
|
936
956
|
expected_output_cols_list=self.output_cols,
|
937
|
-
expected_output_cols_type=
|
957
|
+
expected_output_cols_type=expected_type_inferred,
|
938
958
|
)
|
939
959
|
elif isinstance(dataset, pd.DataFrame):
|
940
960
|
output_df = self._sklearn_inference(
|
@@ -1005,10 +1025,10 @@ class XGBRegressor(BaseTransformer):
|
|
1005
1025
|
|
1006
1026
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
1007
1027
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
1008
|
-
Returns
|
1028
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
1009
1029
|
"""
|
1010
1030
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
1011
|
-
return []
|
1031
|
+
return [output_cols_prefix]
|
1012
1032
|
|
1013
1033
|
classes = self._sklearn_object.classes_
|
1014
1034
|
if isinstance(classes, numpy.ndarray):
|
@@ -1233,7 +1253,7 @@ class XGBRegressor(BaseTransformer):
|
|
1233
1253
|
cp.dump(self._sklearn_object, local_score_file)
|
1234
1254
|
|
1235
1255
|
# Create temp stage to run score.
|
1236
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1256
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1237
1257
|
session = dataset._session
|
1238
1258
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1239
1259
|
SqlResultValidator(
|
@@ -1247,8 +1267,9 @@ class XGBRegressor(BaseTransformer):
|
|
1247
1267
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1248
1268
|
).validate()
|
1249
1269
|
|
1250
|
-
|
1251
|
-
|
1270
|
+
# Use posixpath to construct stage paths
|
1271
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1272
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1252
1273
|
statement_params = telemetry.get_function_usage_statement_params(
|
1253
1274
|
project=_PROJECT,
|
1254
1275
|
subproject=_SUBPROJECT,
|
@@ -1274,6 +1295,7 @@ class XGBRegressor(BaseTransformer):
|
|
1274
1295
|
replace=True,
|
1275
1296
|
session=session,
|
1276
1297
|
statement_params=statement_params,
|
1298
|
+
anonymous=True
|
1277
1299
|
)
|
1278
1300
|
def score_wrapper_sproc(
|
1279
1301
|
session: Session,
|
@@ -1281,7 +1303,8 @@ class XGBRegressor(BaseTransformer):
|
|
1281
1303
|
stage_score_file_name: str,
|
1282
1304
|
input_cols: List[str],
|
1283
1305
|
label_cols: List[str],
|
1284
|
-
sample_weight_col: Optional[str]
|
1306
|
+
sample_weight_col: Optional[str],
|
1307
|
+
statement_params: Dict[str, str]
|
1285
1308
|
) -> float:
|
1286
1309
|
import cloudpickle as cp
|
1287
1310
|
import numpy as np
|
@@ -1331,14 +1354,14 @@ class XGBRegressor(BaseTransformer):
|
|
1331
1354
|
api_calls=[Session.call],
|
1332
1355
|
custom_tags=dict([("autogen", True)]),
|
1333
1356
|
)
|
1334
|
-
score =
|
1335
|
-
|
1357
|
+
score = score_wrapper_sproc(
|
1358
|
+
session,
|
1336
1359
|
query,
|
1337
1360
|
stage_score_file_name,
|
1338
1361
|
identifier.get_unescaped_names(self.input_cols),
|
1339
1362
|
identifier.get_unescaped_names(self.label_cols),
|
1340
1363
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1341
|
-
statement_params
|
1364
|
+
statement_params,
|
1342
1365
|
)
|
1343
1366
|
|
1344
1367
|
cleanup_temp_files([local_score_file_name])
|
@@ -1356,18 +1379,20 @@ class XGBRegressor(BaseTransformer):
|
|
1356
1379
|
if self._sklearn_object._estimator_type == 'classifier':
|
1357
1380
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1358
1381
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1359
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1382
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1383
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1360
1384
|
# For regressor, the type of predict is float64
|
1361
1385
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1362
1386
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1363
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1364
|
-
|
1387
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1388
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1365
1389
|
for prob_func in PROB_FUNCTIONS:
|
1366
1390
|
if hasattr(self, prob_func):
|
1367
1391
|
output_cols_prefix: str = f"{prob_func}_"
|
1368
1392
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1369
1393
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1370
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1394
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1395
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1371
1396
|
|
1372
1397
|
@property
|
1373
1398
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
26
27
|
from snowflake.snowpark import DataFrame, Session
|
27
28
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
28
29
|
from snowflake.snowpark.types import PandasSeries
|
30
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
31
|
|
30
32
|
from snowflake.ml.model.model_signature import (
|
31
33
|
DataType,
|
@@ -392,7 +394,6 @@ class XGBRFClassifier(BaseTransformer):
|
|
392
394
|
**kwargs,
|
393
395
|
) -> None:
|
394
396
|
super().__init__()
|
395
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
396
397
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
397
398
|
|
398
399
|
self._deps = list(deps)
|
@@ -416,6 +417,15 @@ class XGBRFClassifier(BaseTransformer):
|
|
416
417
|
self.set_drop_input_cols(drop_input_cols)
|
417
418
|
self.set_sample_weight_col(sample_weight_col)
|
418
419
|
|
420
|
+
def _get_rand_id(self) -> str:
|
421
|
+
"""
|
422
|
+
Generate random id to be used in sproc and stage names.
|
423
|
+
|
424
|
+
Returns:
|
425
|
+
Random id string usable in sproc, table, and stage names.
|
426
|
+
"""
|
427
|
+
return str(uuid4()).replace("-", "_").upper()
|
428
|
+
|
419
429
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
420
430
|
"""
|
421
431
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -494,7 +504,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
494
504
|
cp.dump(self._sklearn_object, local_transform_file)
|
495
505
|
|
496
506
|
# Create temp stage to run fit.
|
497
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
507
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
498
508
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
499
509
|
SqlResultValidator(
|
500
510
|
session=session,
|
@@ -507,11 +517,12 @@ class XGBRFClassifier(BaseTransformer):
|
|
507
517
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
508
518
|
).validate()
|
509
519
|
|
510
|
-
|
520
|
+
# Use posixpath to construct stage paths
|
521
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
522
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
511
523
|
local_result_file_name = get_temp_file_path()
|
512
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
513
524
|
|
514
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
525
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
515
526
|
statement_params = telemetry.get_function_usage_statement_params(
|
516
527
|
project=_PROJECT,
|
517
528
|
subproject=_SUBPROJECT,
|
@@ -537,6 +548,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
537
548
|
replace=True,
|
538
549
|
session=session,
|
539
550
|
statement_params=statement_params,
|
551
|
+
anonymous=True
|
540
552
|
)
|
541
553
|
def fit_wrapper_sproc(
|
542
554
|
session: Session,
|
@@ -545,7 +557,8 @@ class XGBRFClassifier(BaseTransformer):
|
|
545
557
|
stage_result_file_name: str,
|
546
558
|
input_cols: List[str],
|
547
559
|
label_cols: List[str],
|
548
|
-
sample_weight_col: Optional[str]
|
560
|
+
sample_weight_col: Optional[str],
|
561
|
+
statement_params: Dict[str, str]
|
549
562
|
) -> str:
|
550
563
|
import cloudpickle as cp
|
551
564
|
import numpy as np
|
@@ -612,15 +625,15 @@ class XGBRFClassifier(BaseTransformer):
|
|
612
625
|
api_calls=[Session.call],
|
613
626
|
custom_tags=dict([("autogen", True)]),
|
614
627
|
)
|
615
|
-
sproc_export_file_name =
|
616
|
-
|
628
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
629
|
+
session,
|
617
630
|
query,
|
618
631
|
stage_transform_file_name,
|
619
632
|
stage_result_file_name,
|
620
633
|
identifier.get_unescaped_names(self.input_cols),
|
621
634
|
identifier.get_unescaped_names(self.label_cols),
|
622
635
|
identifier.get_unescaped_names(self.sample_weight_col),
|
623
|
-
statement_params
|
636
|
+
statement_params,
|
624
637
|
)
|
625
638
|
|
626
639
|
if "|" in sproc_export_file_name:
|
@@ -630,7 +643,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
630
643
|
print("\n".join(fields[1:]))
|
631
644
|
|
632
645
|
session.file.get(
|
633
|
-
|
646
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
634
647
|
local_result_file_name,
|
635
648
|
statement_params=statement_params
|
636
649
|
)
|
@@ -676,7 +689,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
676
689
|
|
677
690
|
# Register vectorized UDF for batch inference
|
678
691
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
679
|
-
safe_id=self.
|
692
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
680
693
|
|
681
694
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
682
695
|
# will try to pickle all of self which fails.
|
@@ -768,7 +781,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
768
781
|
return transformed_pandas_df.to_dict("records")
|
769
782
|
|
770
783
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
771
|
-
safe_id=self.
|
784
|
+
safe_id=self._get_rand_id()
|
772
785
|
)
|
773
786
|
|
774
787
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -935,11 +948,18 @@ class XGBRFClassifier(BaseTransformer):
|
|
935
948
|
Transformed dataset.
|
936
949
|
"""
|
937
950
|
if isinstance(dataset, DataFrame):
|
951
|
+
expected_type_inferred = ""
|
952
|
+
# when it is classifier, infer the datatype from label columns
|
953
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
954
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
955
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
956
|
+
)
|
957
|
+
|
938
958
|
output_df = self._batch_inference(
|
939
959
|
dataset=dataset,
|
940
960
|
inference_method="predict",
|
941
961
|
expected_output_cols_list=self.output_cols,
|
942
|
-
expected_output_cols_type=
|
962
|
+
expected_output_cols_type=expected_type_inferred,
|
943
963
|
)
|
944
964
|
elif isinstance(dataset, pd.DataFrame):
|
945
965
|
output_df = self._sklearn_inference(
|
@@ -1010,10 +1030,10 @@ class XGBRFClassifier(BaseTransformer):
|
|
1010
1030
|
|
1011
1031
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
1012
1032
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
1013
|
-
Returns
|
1033
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
1014
1034
|
"""
|
1015
1035
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
1016
|
-
return []
|
1036
|
+
return [output_cols_prefix]
|
1017
1037
|
|
1018
1038
|
classes = self._sklearn_object.classes_
|
1019
1039
|
if isinstance(classes, numpy.ndarray):
|
@@ -1242,7 +1262,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1242
1262
|
cp.dump(self._sklearn_object, local_score_file)
|
1243
1263
|
|
1244
1264
|
# Create temp stage to run score.
|
1245
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1265
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1246
1266
|
session = dataset._session
|
1247
1267
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1248
1268
|
SqlResultValidator(
|
@@ -1256,8 +1276,9 @@ class XGBRFClassifier(BaseTransformer):
|
|
1256
1276
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1257
1277
|
).validate()
|
1258
1278
|
|
1259
|
-
|
1260
|
-
|
1279
|
+
# Use posixpath to construct stage paths
|
1280
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1281
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1261
1282
|
statement_params = telemetry.get_function_usage_statement_params(
|
1262
1283
|
project=_PROJECT,
|
1263
1284
|
subproject=_SUBPROJECT,
|
@@ -1283,6 +1304,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1283
1304
|
replace=True,
|
1284
1305
|
session=session,
|
1285
1306
|
statement_params=statement_params,
|
1307
|
+
anonymous=True
|
1286
1308
|
)
|
1287
1309
|
def score_wrapper_sproc(
|
1288
1310
|
session: Session,
|
@@ -1290,7 +1312,8 @@ class XGBRFClassifier(BaseTransformer):
|
|
1290
1312
|
stage_score_file_name: str,
|
1291
1313
|
input_cols: List[str],
|
1292
1314
|
label_cols: List[str],
|
1293
|
-
sample_weight_col: Optional[str]
|
1315
|
+
sample_weight_col: Optional[str],
|
1316
|
+
statement_params: Dict[str, str]
|
1294
1317
|
) -> float:
|
1295
1318
|
import cloudpickle as cp
|
1296
1319
|
import numpy as np
|
@@ -1340,14 +1363,14 @@ class XGBRFClassifier(BaseTransformer):
|
|
1340
1363
|
api_calls=[Session.call],
|
1341
1364
|
custom_tags=dict([("autogen", True)]),
|
1342
1365
|
)
|
1343
|
-
score =
|
1344
|
-
|
1366
|
+
score = score_wrapper_sproc(
|
1367
|
+
session,
|
1345
1368
|
query,
|
1346
1369
|
stage_score_file_name,
|
1347
1370
|
identifier.get_unescaped_names(self.input_cols),
|
1348
1371
|
identifier.get_unescaped_names(self.label_cols),
|
1349
1372
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1350
|
-
statement_params
|
1373
|
+
statement_params,
|
1351
1374
|
)
|
1352
1375
|
|
1353
1376
|
cleanup_temp_files([local_score_file_name])
|
@@ -1365,18 +1388,20 @@ class XGBRFClassifier(BaseTransformer):
|
|
1365
1388
|
if self._sklearn_object._estimator_type == 'classifier':
|
1366
1389
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1367
1390
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1368
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1391
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1392
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1369
1393
|
# For regressor, the type of predict is float64
|
1370
1394
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1371
1395
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1372
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1373
|
-
|
1396
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1397
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1374
1398
|
for prob_func in PROB_FUNCTIONS:
|
1375
1399
|
if hasattr(self, prob_func):
|
1376
1400
|
output_cols_prefix: str = f"{prob_func}_"
|
1377
1401
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1378
1402
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1379
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1403
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1404
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1380
1405
|
|
1381
1406
|
@property
|
1382
1407
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|