snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -360,7 +362,6 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
360
362
|
sample_weight_col: Optional[str] = None,
|
361
363
|
) -> None:
|
362
364
|
super().__init__()
|
363
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
364
365
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
365
366
|
|
366
367
|
self._deps = list(deps)
|
@@ -399,6 +400,15 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
399
400
|
self.set_drop_input_cols(drop_input_cols)
|
400
401
|
self.set_sample_weight_col(sample_weight_col)
|
401
402
|
|
403
|
+
def _get_rand_id(self) -> str:
|
404
|
+
"""
|
405
|
+
Generate random id to be used in sproc and stage names.
|
406
|
+
|
407
|
+
Returns:
|
408
|
+
Random id string usable in sproc, table, and stage names.
|
409
|
+
"""
|
410
|
+
return str(uuid4()).replace("-", "_").upper()
|
411
|
+
|
402
412
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
403
413
|
"""
|
404
414
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -477,7 +487,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
477
487
|
cp.dump(self._sklearn_object, local_transform_file)
|
478
488
|
|
479
489
|
# Create temp stage to run fit.
|
480
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
490
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
481
491
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
482
492
|
SqlResultValidator(
|
483
493
|
session=session,
|
@@ -490,11 +500,12 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
490
500
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
491
501
|
).validate()
|
492
502
|
|
493
|
-
|
503
|
+
# Use posixpath to construct stage paths
|
504
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
505
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
494
506
|
local_result_file_name = get_temp_file_path()
|
495
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
496
507
|
|
497
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
508
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
498
509
|
statement_params = telemetry.get_function_usage_statement_params(
|
499
510
|
project=_PROJECT,
|
500
511
|
subproject=_SUBPROJECT,
|
@@ -520,6 +531,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
520
531
|
replace=True,
|
521
532
|
session=session,
|
522
533
|
statement_params=statement_params,
|
534
|
+
anonymous=True
|
523
535
|
)
|
524
536
|
def fit_wrapper_sproc(
|
525
537
|
session: Session,
|
@@ -528,7 +540,8 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
528
540
|
stage_result_file_name: str,
|
529
541
|
input_cols: List[str],
|
530
542
|
label_cols: List[str],
|
531
|
-
sample_weight_col: Optional[str]
|
543
|
+
sample_weight_col: Optional[str],
|
544
|
+
statement_params: Dict[str, str]
|
532
545
|
) -> str:
|
533
546
|
import cloudpickle as cp
|
534
547
|
import numpy as np
|
@@ -595,15 +608,15 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
595
608
|
api_calls=[Session.call],
|
596
609
|
custom_tags=dict([("autogen", True)]),
|
597
610
|
)
|
598
|
-
sproc_export_file_name =
|
599
|
-
|
611
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
612
|
+
session,
|
600
613
|
query,
|
601
614
|
stage_transform_file_name,
|
602
615
|
stage_result_file_name,
|
603
616
|
identifier.get_unescaped_names(self.input_cols),
|
604
617
|
identifier.get_unescaped_names(self.label_cols),
|
605
618
|
identifier.get_unescaped_names(self.sample_weight_col),
|
606
|
-
statement_params
|
619
|
+
statement_params,
|
607
620
|
)
|
608
621
|
|
609
622
|
if "|" in sproc_export_file_name:
|
@@ -613,7 +626,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
613
626
|
print("\n".join(fields[1:]))
|
614
627
|
|
615
628
|
session.file.get(
|
616
|
-
|
629
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
617
630
|
local_result_file_name,
|
618
631
|
statement_params=statement_params
|
619
632
|
)
|
@@ -659,7 +672,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
659
672
|
|
660
673
|
# Register vectorized UDF for batch inference
|
661
674
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
662
|
-
safe_id=self.
|
675
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
663
676
|
|
664
677
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
665
678
|
# will try to pickle all of self which fails.
|
@@ -751,7 +764,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
751
764
|
return transformed_pandas_df.to_dict("records")
|
752
765
|
|
753
766
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
754
|
-
safe_id=self.
|
767
|
+
safe_id=self._get_rand_id()
|
755
768
|
)
|
756
769
|
|
757
770
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -918,11 +931,18 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
918
931
|
Transformed dataset.
|
919
932
|
"""
|
920
933
|
if isinstance(dataset, DataFrame):
|
934
|
+
expected_type_inferred = ""
|
935
|
+
# when it is classifier, infer the datatype from label columns
|
936
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
937
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
938
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
939
|
+
)
|
940
|
+
|
921
941
|
output_df = self._batch_inference(
|
922
942
|
dataset=dataset,
|
923
943
|
inference_method="predict",
|
924
944
|
expected_output_cols_list=self.output_cols,
|
925
|
-
expected_output_cols_type=
|
945
|
+
expected_output_cols_type=expected_type_inferred,
|
926
946
|
)
|
927
947
|
elif isinstance(dataset, pd.DataFrame):
|
928
948
|
output_df = self._sklearn_inference(
|
@@ -993,10 +1013,10 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
993
1013
|
|
994
1014
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
995
1015
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
996
|
-
Returns
|
1016
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
997
1017
|
"""
|
998
1018
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
999
|
-
return []
|
1019
|
+
return [output_cols_prefix]
|
1000
1020
|
|
1001
1021
|
classes = self._sklearn_object.classes_
|
1002
1022
|
if isinstance(classes, numpy.ndarray):
|
@@ -1227,7 +1247,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
1227
1247
|
cp.dump(self._sklearn_object, local_score_file)
|
1228
1248
|
|
1229
1249
|
# Create temp stage to run score.
|
1230
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1250
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1231
1251
|
session = dataset._session
|
1232
1252
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1233
1253
|
SqlResultValidator(
|
@@ -1241,8 +1261,9 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
1241
1261
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1242
1262
|
).validate()
|
1243
1263
|
|
1244
|
-
|
1245
|
-
|
1264
|
+
# Use posixpath to construct stage paths
|
1265
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1266
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1246
1267
|
statement_params = telemetry.get_function_usage_statement_params(
|
1247
1268
|
project=_PROJECT,
|
1248
1269
|
subproject=_SUBPROJECT,
|
@@ -1268,6 +1289,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
1268
1289
|
replace=True,
|
1269
1290
|
session=session,
|
1270
1291
|
statement_params=statement_params,
|
1292
|
+
anonymous=True
|
1271
1293
|
)
|
1272
1294
|
def score_wrapper_sproc(
|
1273
1295
|
session: Session,
|
@@ -1275,7 +1297,8 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
1275
1297
|
stage_score_file_name: str,
|
1276
1298
|
input_cols: List[str],
|
1277
1299
|
label_cols: List[str],
|
1278
|
-
sample_weight_col: Optional[str]
|
1300
|
+
sample_weight_col: Optional[str],
|
1301
|
+
statement_params: Dict[str, str]
|
1279
1302
|
) -> float:
|
1280
1303
|
import cloudpickle as cp
|
1281
1304
|
import numpy as np
|
@@ -1325,14 +1348,14 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
1325
1348
|
api_calls=[Session.call],
|
1326
1349
|
custom_tags=dict([("autogen", True)]),
|
1327
1350
|
)
|
1328
|
-
score =
|
1329
|
-
|
1351
|
+
score = score_wrapper_sproc(
|
1352
|
+
session,
|
1330
1353
|
query,
|
1331
1354
|
stage_score_file_name,
|
1332
1355
|
identifier.get_unescaped_names(self.input_cols),
|
1333
1356
|
identifier.get_unescaped_names(self.label_cols),
|
1334
1357
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1335
|
-
statement_params
|
1358
|
+
statement_params,
|
1336
1359
|
)
|
1337
1360
|
|
1338
1361
|
cleanup_temp_files([local_score_file_name])
|
@@ -1350,18 +1373,20 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
1350
1373
|
if self._sklearn_object._estimator_type == 'classifier':
|
1351
1374
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1352
1375
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1353
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1376
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1377
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1354
1378
|
# For regressor, the type of predict is float64
|
1355
1379
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1356
1380
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1357
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1358
|
-
|
1381
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1382
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1359
1383
|
for prob_func in PROB_FUNCTIONS:
|
1360
1384
|
if hasattr(self, prob_func):
|
1361
1385
|
output_cols_prefix: str = f"{prob_func}_"
|
1362
1386
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1363
1387
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1364
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1388
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1389
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1365
1390
|
|
1366
1391
|
@property
|
1367
1392
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -368,7 +370,6 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
368
370
|
sample_weight_col: Optional[str] = None,
|
369
371
|
) -> None:
|
370
372
|
super().__init__()
|
371
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
372
373
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
373
374
|
|
374
375
|
self._deps = list(deps)
|
@@ -408,6 +409,15 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
408
409
|
self.set_drop_input_cols(drop_input_cols)
|
409
410
|
self.set_sample_weight_col(sample_weight_col)
|
410
411
|
|
412
|
+
def _get_rand_id(self) -> str:
|
413
|
+
"""
|
414
|
+
Generate random id to be used in sproc and stage names.
|
415
|
+
|
416
|
+
Returns:
|
417
|
+
Random id string usable in sproc, table, and stage names.
|
418
|
+
"""
|
419
|
+
return str(uuid4()).replace("-", "_").upper()
|
420
|
+
|
411
421
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
412
422
|
"""
|
413
423
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -486,7 +496,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
486
496
|
cp.dump(self._sklearn_object, local_transform_file)
|
487
497
|
|
488
498
|
# Create temp stage to run fit.
|
489
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
499
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
490
500
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
491
501
|
SqlResultValidator(
|
492
502
|
session=session,
|
@@ -499,11 +509,12 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
499
509
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
500
510
|
).validate()
|
501
511
|
|
502
|
-
|
512
|
+
# Use posixpath to construct stage paths
|
513
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
514
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
503
515
|
local_result_file_name = get_temp_file_path()
|
504
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
505
516
|
|
506
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
517
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
507
518
|
statement_params = telemetry.get_function_usage_statement_params(
|
508
519
|
project=_PROJECT,
|
509
520
|
subproject=_SUBPROJECT,
|
@@ -529,6 +540,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
529
540
|
replace=True,
|
530
541
|
session=session,
|
531
542
|
statement_params=statement_params,
|
543
|
+
anonymous=True
|
532
544
|
)
|
533
545
|
def fit_wrapper_sproc(
|
534
546
|
session: Session,
|
@@ -537,7 +549,8 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
537
549
|
stage_result_file_name: str,
|
538
550
|
input_cols: List[str],
|
539
551
|
label_cols: List[str],
|
540
|
-
sample_weight_col: Optional[str]
|
552
|
+
sample_weight_col: Optional[str],
|
553
|
+
statement_params: Dict[str, str]
|
541
554
|
) -> str:
|
542
555
|
import cloudpickle as cp
|
543
556
|
import numpy as np
|
@@ -604,15 +617,15 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
604
617
|
api_calls=[Session.call],
|
605
618
|
custom_tags=dict([("autogen", True)]),
|
606
619
|
)
|
607
|
-
sproc_export_file_name =
|
608
|
-
|
620
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
621
|
+
session,
|
609
622
|
query,
|
610
623
|
stage_transform_file_name,
|
611
624
|
stage_result_file_name,
|
612
625
|
identifier.get_unescaped_names(self.input_cols),
|
613
626
|
identifier.get_unescaped_names(self.label_cols),
|
614
627
|
identifier.get_unescaped_names(self.sample_weight_col),
|
615
|
-
statement_params
|
628
|
+
statement_params,
|
616
629
|
)
|
617
630
|
|
618
631
|
if "|" in sproc_export_file_name:
|
@@ -622,7 +635,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
622
635
|
print("\n".join(fields[1:]))
|
623
636
|
|
624
637
|
session.file.get(
|
625
|
-
|
638
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
626
639
|
local_result_file_name,
|
627
640
|
statement_params=statement_params
|
628
641
|
)
|
@@ -668,7 +681,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
668
681
|
|
669
682
|
# Register vectorized UDF for batch inference
|
670
683
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
671
|
-
safe_id=self.
|
684
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
672
685
|
|
673
686
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
674
687
|
# will try to pickle all of self which fails.
|
@@ -760,7 +773,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
760
773
|
return transformed_pandas_df.to_dict("records")
|
761
774
|
|
762
775
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
763
|
-
safe_id=self.
|
776
|
+
safe_id=self._get_rand_id()
|
764
777
|
)
|
765
778
|
|
766
779
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -927,11 +940,18 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
927
940
|
Transformed dataset.
|
928
941
|
"""
|
929
942
|
if isinstance(dataset, DataFrame):
|
943
|
+
expected_type_inferred = "float"
|
944
|
+
# when it is classifier, infer the datatype from label columns
|
945
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
946
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
947
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
948
|
+
)
|
949
|
+
|
930
950
|
output_df = self._batch_inference(
|
931
951
|
dataset=dataset,
|
932
952
|
inference_method="predict",
|
933
953
|
expected_output_cols_list=self.output_cols,
|
934
|
-
expected_output_cols_type=
|
954
|
+
expected_output_cols_type=expected_type_inferred,
|
935
955
|
)
|
936
956
|
elif isinstance(dataset, pd.DataFrame):
|
937
957
|
output_df = self._sklearn_inference(
|
@@ -1002,10 +1022,10 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1002
1022
|
|
1003
1023
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
1004
1024
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
1005
|
-
Returns
|
1025
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
1006
1026
|
"""
|
1007
1027
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
1008
|
-
return []
|
1028
|
+
return [output_cols_prefix]
|
1009
1029
|
|
1010
1030
|
classes = self._sklearn_object.classes_
|
1011
1031
|
if isinstance(classes, numpy.ndarray):
|
@@ -1230,7 +1250,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1230
1250
|
cp.dump(self._sklearn_object, local_score_file)
|
1231
1251
|
|
1232
1252
|
# Create temp stage to run score.
|
1233
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1253
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1234
1254
|
session = dataset._session
|
1235
1255
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1236
1256
|
SqlResultValidator(
|
@@ -1244,8 +1264,9 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1244
1264
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1245
1265
|
).validate()
|
1246
1266
|
|
1247
|
-
|
1248
|
-
|
1267
|
+
# Use posixpath to construct stage paths
|
1268
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1269
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1249
1270
|
statement_params = telemetry.get_function_usage_statement_params(
|
1250
1271
|
project=_PROJECT,
|
1251
1272
|
subproject=_SUBPROJECT,
|
@@ -1271,6 +1292,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1271
1292
|
replace=True,
|
1272
1293
|
session=session,
|
1273
1294
|
statement_params=statement_params,
|
1295
|
+
anonymous=True
|
1274
1296
|
)
|
1275
1297
|
def score_wrapper_sproc(
|
1276
1298
|
session: Session,
|
@@ -1278,7 +1300,8 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1278
1300
|
stage_score_file_name: str,
|
1279
1301
|
input_cols: List[str],
|
1280
1302
|
label_cols: List[str],
|
1281
|
-
sample_weight_col: Optional[str]
|
1303
|
+
sample_weight_col: Optional[str],
|
1304
|
+
statement_params: Dict[str, str]
|
1282
1305
|
) -> float:
|
1283
1306
|
import cloudpickle as cp
|
1284
1307
|
import numpy as np
|
@@ -1328,14 +1351,14 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1328
1351
|
api_calls=[Session.call],
|
1329
1352
|
custom_tags=dict([("autogen", True)]),
|
1330
1353
|
)
|
1331
|
-
score =
|
1332
|
-
|
1354
|
+
score = score_wrapper_sproc(
|
1355
|
+
session,
|
1333
1356
|
query,
|
1334
1357
|
stage_score_file_name,
|
1335
1358
|
identifier.get_unescaped_names(self.input_cols),
|
1336
1359
|
identifier.get_unescaped_names(self.label_cols),
|
1337
1360
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1338
|
-
statement_params
|
1361
|
+
statement_params,
|
1339
1362
|
)
|
1340
1363
|
|
1341
1364
|
cleanup_temp_files([local_score_file_name])
|
@@ -1353,18 +1376,20 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1353
1376
|
if self._sklearn_object._estimator_type == 'classifier':
|
1354
1377
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1355
1378
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1356
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1379
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1380
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1357
1381
|
# For regressor, the type of predict is float64
|
1358
1382
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1359
1383
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1360
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1361
|
-
|
1384
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1385
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1362
1386
|
for prob_func in PROB_FUNCTIONS:
|
1363
1387
|
if hasattr(self, prob_func):
|
1364
1388
|
output_cols_prefix: str = f"{prob_func}_"
|
1365
1389
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1366
1390
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1367
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1391
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1392
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1368
1393
|
|
1369
1394
|
@property
|
1370
1395
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|