snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -338,7 +340,6 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
338
340
|
sample_weight_col: Optional[str] = None,
|
339
341
|
) -> None:
|
340
342
|
super().__init__()
|
341
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
342
343
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
343
344
|
|
344
345
|
self._deps = list(deps)
|
@@ -377,6 +378,15 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
377
378
|
self.set_drop_input_cols(drop_input_cols)
|
378
379
|
self.set_sample_weight_col(sample_weight_col)
|
379
380
|
|
381
|
+
def _get_rand_id(self) -> str:
|
382
|
+
"""
|
383
|
+
Generate random id to be used in sproc and stage names.
|
384
|
+
|
385
|
+
Returns:
|
386
|
+
Random id string usable in sproc, table, and stage names.
|
387
|
+
"""
|
388
|
+
return str(uuid4()).replace("-", "_").upper()
|
389
|
+
|
380
390
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
381
391
|
"""
|
382
392
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -455,7 +465,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
455
465
|
cp.dump(self._sklearn_object, local_transform_file)
|
456
466
|
|
457
467
|
# Create temp stage to run fit.
|
458
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
468
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
459
469
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
460
470
|
SqlResultValidator(
|
461
471
|
session=session,
|
@@ -468,11 +478,12 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
468
478
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
469
479
|
).validate()
|
470
480
|
|
471
|
-
|
481
|
+
# Use posixpath to construct stage paths
|
482
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
483
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
472
484
|
local_result_file_name = get_temp_file_path()
|
473
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
474
485
|
|
475
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
486
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
476
487
|
statement_params = telemetry.get_function_usage_statement_params(
|
477
488
|
project=_PROJECT,
|
478
489
|
subproject=_SUBPROJECT,
|
@@ -498,6 +509,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
498
509
|
replace=True,
|
499
510
|
session=session,
|
500
511
|
statement_params=statement_params,
|
512
|
+
anonymous=True
|
501
513
|
)
|
502
514
|
def fit_wrapper_sproc(
|
503
515
|
session: Session,
|
@@ -506,7 +518,8 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
506
518
|
stage_result_file_name: str,
|
507
519
|
input_cols: List[str],
|
508
520
|
label_cols: List[str],
|
509
|
-
sample_weight_col: Optional[str]
|
521
|
+
sample_weight_col: Optional[str],
|
522
|
+
statement_params: Dict[str, str]
|
510
523
|
) -> str:
|
511
524
|
import cloudpickle as cp
|
512
525
|
import numpy as np
|
@@ -573,15 +586,15 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
573
586
|
api_calls=[Session.call],
|
574
587
|
custom_tags=dict([("autogen", True)]),
|
575
588
|
)
|
576
|
-
sproc_export_file_name =
|
577
|
-
|
589
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
590
|
+
session,
|
578
591
|
query,
|
579
592
|
stage_transform_file_name,
|
580
593
|
stage_result_file_name,
|
581
594
|
identifier.get_unescaped_names(self.input_cols),
|
582
595
|
identifier.get_unescaped_names(self.label_cols),
|
583
596
|
identifier.get_unescaped_names(self.sample_weight_col),
|
584
|
-
statement_params
|
597
|
+
statement_params,
|
585
598
|
)
|
586
599
|
|
587
600
|
if "|" in sproc_export_file_name:
|
@@ -591,7 +604,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
591
604
|
print("\n".join(fields[1:]))
|
592
605
|
|
593
606
|
session.file.get(
|
594
|
-
|
607
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
595
608
|
local_result_file_name,
|
596
609
|
statement_params=statement_params
|
597
610
|
)
|
@@ -637,7 +650,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
637
650
|
|
638
651
|
# Register vectorized UDF for batch inference
|
639
652
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
640
|
-
safe_id=self.
|
653
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
641
654
|
|
642
655
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
643
656
|
# will try to pickle all of self which fails.
|
@@ -729,7 +742,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
729
742
|
return transformed_pandas_df.to_dict("records")
|
730
743
|
|
731
744
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
732
|
-
safe_id=self.
|
745
|
+
safe_id=self._get_rand_id()
|
733
746
|
)
|
734
747
|
|
735
748
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -896,11 +909,18 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
896
909
|
Transformed dataset.
|
897
910
|
"""
|
898
911
|
if isinstance(dataset, DataFrame):
|
912
|
+
expected_type_inferred = ""
|
913
|
+
# when it is classifier, infer the datatype from label columns
|
914
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
915
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
916
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
917
|
+
)
|
918
|
+
|
899
919
|
output_df = self._batch_inference(
|
900
920
|
dataset=dataset,
|
901
921
|
inference_method="predict",
|
902
922
|
expected_output_cols_list=self.output_cols,
|
903
|
-
expected_output_cols_type=
|
923
|
+
expected_output_cols_type=expected_type_inferred,
|
904
924
|
)
|
905
925
|
elif isinstance(dataset, pd.DataFrame):
|
906
926
|
output_df = self._sklearn_inference(
|
@@ -971,10 +991,10 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
971
991
|
|
972
992
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
973
993
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
974
|
-
Returns
|
994
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
975
995
|
"""
|
976
996
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
977
|
-
return []
|
997
|
+
return [output_cols_prefix]
|
978
998
|
|
979
999
|
classes = self._sklearn_object.classes_
|
980
1000
|
if isinstance(classes, numpy.ndarray):
|
@@ -1205,7 +1225,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1205
1225
|
cp.dump(self._sklearn_object, local_score_file)
|
1206
1226
|
|
1207
1227
|
# Create temp stage to run score.
|
1208
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1228
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1209
1229
|
session = dataset._session
|
1210
1230
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1211
1231
|
SqlResultValidator(
|
@@ -1219,8 +1239,9 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1219
1239
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1220
1240
|
).validate()
|
1221
1241
|
|
1222
|
-
|
1223
|
-
|
1242
|
+
# Use posixpath to construct stage paths
|
1243
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1244
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1224
1245
|
statement_params = telemetry.get_function_usage_statement_params(
|
1225
1246
|
project=_PROJECT,
|
1226
1247
|
subproject=_SUBPROJECT,
|
@@ -1246,6 +1267,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1246
1267
|
replace=True,
|
1247
1268
|
session=session,
|
1248
1269
|
statement_params=statement_params,
|
1270
|
+
anonymous=True
|
1249
1271
|
)
|
1250
1272
|
def score_wrapper_sproc(
|
1251
1273
|
session: Session,
|
@@ -1253,7 +1275,8 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1253
1275
|
stage_score_file_name: str,
|
1254
1276
|
input_cols: List[str],
|
1255
1277
|
label_cols: List[str],
|
1256
|
-
sample_weight_col: Optional[str]
|
1278
|
+
sample_weight_col: Optional[str],
|
1279
|
+
statement_params: Dict[str, str]
|
1257
1280
|
) -> float:
|
1258
1281
|
import cloudpickle as cp
|
1259
1282
|
import numpy as np
|
@@ -1303,14 +1326,14 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1303
1326
|
api_calls=[Session.call],
|
1304
1327
|
custom_tags=dict([("autogen", True)]),
|
1305
1328
|
)
|
1306
|
-
score =
|
1307
|
-
|
1329
|
+
score = score_wrapper_sproc(
|
1330
|
+
session,
|
1308
1331
|
query,
|
1309
1332
|
stage_score_file_name,
|
1310
1333
|
identifier.get_unescaped_names(self.input_cols),
|
1311
1334
|
identifier.get_unescaped_names(self.label_cols),
|
1312
1335
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1313
|
-
statement_params
|
1336
|
+
statement_params,
|
1314
1337
|
)
|
1315
1338
|
|
1316
1339
|
cleanup_temp_files([local_score_file_name])
|
@@ -1328,18 +1351,20 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1328
1351
|
if self._sklearn_object._estimator_type == 'classifier':
|
1329
1352
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1330
1353
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1331
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1354
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1355
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1332
1356
|
# For regressor, the type of predict is float64
|
1333
1357
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1334
1358
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1335
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1336
|
-
|
1359
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1360
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1337
1361
|
for prob_func in PROB_FUNCTIONS:
|
1338
1362
|
if hasattr(self, prob_func):
|
1339
1363
|
output_cols_prefix: str = f"{prob_func}_"
|
1340
1364
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1341
1365
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1342
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1366
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1367
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1343
1368
|
|
1344
1369
|
@property
|
1345
1370
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -328,7 +330,6 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
328
330
|
sample_weight_col: Optional[str] = None,
|
329
331
|
) -> None:
|
330
332
|
super().__init__()
|
331
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
332
333
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
333
334
|
|
334
335
|
self._deps = list(deps)
|
@@ -367,6 +368,15 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
367
368
|
self.set_drop_input_cols(drop_input_cols)
|
368
369
|
self.set_sample_weight_col(sample_weight_col)
|
369
370
|
|
371
|
+
def _get_rand_id(self) -> str:
|
372
|
+
"""
|
373
|
+
Generate random id to be used in sproc and stage names.
|
374
|
+
|
375
|
+
Returns:
|
376
|
+
Random id string usable in sproc, table, and stage names.
|
377
|
+
"""
|
378
|
+
return str(uuid4()).replace("-", "_").upper()
|
379
|
+
|
370
380
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
371
381
|
"""
|
372
382
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -445,7 +455,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
445
455
|
cp.dump(self._sklearn_object, local_transform_file)
|
446
456
|
|
447
457
|
# Create temp stage to run fit.
|
448
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
458
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
449
459
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
450
460
|
SqlResultValidator(
|
451
461
|
session=session,
|
@@ -458,11 +468,12 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
458
468
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
459
469
|
).validate()
|
460
470
|
|
461
|
-
|
471
|
+
# Use posixpath to construct stage paths
|
472
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
473
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
462
474
|
local_result_file_name = get_temp_file_path()
|
463
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
464
475
|
|
465
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
476
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
466
477
|
statement_params = telemetry.get_function_usage_statement_params(
|
467
478
|
project=_PROJECT,
|
468
479
|
subproject=_SUBPROJECT,
|
@@ -488,6 +499,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
488
499
|
replace=True,
|
489
500
|
session=session,
|
490
501
|
statement_params=statement_params,
|
502
|
+
anonymous=True
|
491
503
|
)
|
492
504
|
def fit_wrapper_sproc(
|
493
505
|
session: Session,
|
@@ -496,7 +508,8 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
496
508
|
stage_result_file_name: str,
|
497
509
|
input_cols: List[str],
|
498
510
|
label_cols: List[str],
|
499
|
-
sample_weight_col: Optional[str]
|
511
|
+
sample_weight_col: Optional[str],
|
512
|
+
statement_params: Dict[str, str]
|
500
513
|
) -> str:
|
501
514
|
import cloudpickle as cp
|
502
515
|
import numpy as np
|
@@ -563,15 +576,15 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
563
576
|
api_calls=[Session.call],
|
564
577
|
custom_tags=dict([("autogen", True)]),
|
565
578
|
)
|
566
|
-
sproc_export_file_name =
|
567
|
-
|
579
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
580
|
+
session,
|
568
581
|
query,
|
569
582
|
stage_transform_file_name,
|
570
583
|
stage_result_file_name,
|
571
584
|
identifier.get_unescaped_names(self.input_cols),
|
572
585
|
identifier.get_unescaped_names(self.label_cols),
|
573
586
|
identifier.get_unescaped_names(self.sample_weight_col),
|
574
|
-
statement_params
|
587
|
+
statement_params,
|
575
588
|
)
|
576
589
|
|
577
590
|
if "|" in sproc_export_file_name:
|
@@ -581,7 +594,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
581
594
|
print("\n".join(fields[1:]))
|
582
595
|
|
583
596
|
session.file.get(
|
584
|
-
|
597
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
585
598
|
local_result_file_name,
|
586
599
|
statement_params=statement_params
|
587
600
|
)
|
@@ -627,7 +640,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
627
640
|
|
628
641
|
# Register vectorized UDF for batch inference
|
629
642
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
630
|
-
safe_id=self.
|
643
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
631
644
|
|
632
645
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
633
646
|
# will try to pickle all of self which fails.
|
@@ -719,7 +732,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
719
732
|
return transformed_pandas_df.to_dict("records")
|
720
733
|
|
721
734
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
722
|
-
safe_id=self.
|
735
|
+
safe_id=self._get_rand_id()
|
723
736
|
)
|
724
737
|
|
725
738
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -886,11 +899,18 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
886
899
|
Transformed dataset.
|
887
900
|
"""
|
888
901
|
if isinstance(dataset, DataFrame):
|
902
|
+
expected_type_inferred = "float"
|
903
|
+
# when it is classifier, infer the datatype from label columns
|
904
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
905
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
906
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
907
|
+
)
|
908
|
+
|
889
909
|
output_df = self._batch_inference(
|
890
910
|
dataset=dataset,
|
891
911
|
inference_method="predict",
|
892
912
|
expected_output_cols_list=self.output_cols,
|
893
|
-
expected_output_cols_type=
|
913
|
+
expected_output_cols_type=expected_type_inferred,
|
894
914
|
)
|
895
915
|
elif isinstance(dataset, pd.DataFrame):
|
896
916
|
output_df = self._sklearn_inference(
|
@@ -961,10 +981,10 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
961
981
|
|
962
982
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
963
983
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
964
|
-
Returns
|
984
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
965
985
|
"""
|
966
986
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
967
|
-
return []
|
987
|
+
return [output_cols_prefix]
|
968
988
|
|
969
989
|
classes = self._sklearn_object.classes_
|
970
990
|
if isinstance(classes, numpy.ndarray):
|
@@ -1189,7 +1209,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1189
1209
|
cp.dump(self._sklearn_object, local_score_file)
|
1190
1210
|
|
1191
1211
|
# Create temp stage to run score.
|
1192
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1212
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1193
1213
|
session = dataset._session
|
1194
1214
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1195
1215
|
SqlResultValidator(
|
@@ -1203,8 +1223,9 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1203
1223
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1204
1224
|
).validate()
|
1205
1225
|
|
1206
|
-
|
1207
|
-
|
1226
|
+
# Use posixpath to construct stage paths
|
1227
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1228
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1208
1229
|
statement_params = telemetry.get_function_usage_statement_params(
|
1209
1230
|
project=_PROJECT,
|
1210
1231
|
subproject=_SUBPROJECT,
|
@@ -1230,6 +1251,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1230
1251
|
replace=True,
|
1231
1252
|
session=session,
|
1232
1253
|
statement_params=statement_params,
|
1254
|
+
anonymous=True
|
1233
1255
|
)
|
1234
1256
|
def score_wrapper_sproc(
|
1235
1257
|
session: Session,
|
@@ -1237,7 +1259,8 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1237
1259
|
stage_score_file_name: str,
|
1238
1260
|
input_cols: List[str],
|
1239
1261
|
label_cols: List[str],
|
1240
|
-
sample_weight_col: Optional[str]
|
1262
|
+
sample_weight_col: Optional[str],
|
1263
|
+
statement_params: Dict[str, str]
|
1241
1264
|
) -> float:
|
1242
1265
|
import cloudpickle as cp
|
1243
1266
|
import numpy as np
|
@@ -1287,14 +1310,14 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1287
1310
|
api_calls=[Session.call],
|
1288
1311
|
custom_tags=dict([("autogen", True)]),
|
1289
1312
|
)
|
1290
|
-
score =
|
1291
|
-
|
1313
|
+
score = score_wrapper_sproc(
|
1314
|
+
session,
|
1292
1315
|
query,
|
1293
1316
|
stage_score_file_name,
|
1294
1317
|
identifier.get_unescaped_names(self.input_cols),
|
1295
1318
|
identifier.get_unescaped_names(self.label_cols),
|
1296
1319
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1297
|
-
statement_params
|
1320
|
+
statement_params,
|
1298
1321
|
)
|
1299
1322
|
|
1300
1323
|
cleanup_temp_files([local_score_file_name])
|
@@ -1312,18 +1335,20 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1312
1335
|
if self._sklearn_object._estimator_type == 'classifier':
|
1313
1336
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1314
1337
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1315
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1338
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1339
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1316
1340
|
# For regressor, the type of predict is float64
|
1317
1341
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1318
1342
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1319
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1320
|
-
|
1343
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1344
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1321
1345
|
for prob_func in PROB_FUNCTIONS:
|
1322
1346
|
if hasattr(self, prob_func):
|
1323
1347
|
output_cols_prefix: str = f"{prob_func}_"
|
1324
1348
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1325
1349
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1326
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1350
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1351
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1327
1352
|
|
1328
1353
|
@property
|
1329
1354
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|