snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -282,7 +284,6 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
282
284
|
sample_weight_col: Optional[str] = None,
|
283
285
|
) -> None:
|
284
286
|
super().__init__()
|
285
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
286
287
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
287
288
|
|
288
289
|
self._deps = list(deps)
|
@@ -312,6 +313,15 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
312
313
|
self.set_drop_input_cols(drop_input_cols)
|
313
314
|
self.set_sample_weight_col(sample_weight_col)
|
314
315
|
|
316
|
+
def _get_rand_id(self) -> str:
|
317
|
+
"""
|
318
|
+
Generate random id to be used in sproc and stage names.
|
319
|
+
|
320
|
+
Returns:
|
321
|
+
Random id string usable in sproc, table, and stage names.
|
322
|
+
"""
|
323
|
+
return str(uuid4()).replace("-", "_").upper()
|
324
|
+
|
315
325
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
316
326
|
"""
|
317
327
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -390,7 +400,7 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
390
400
|
cp.dump(self._sklearn_object, local_transform_file)
|
391
401
|
|
392
402
|
# Create temp stage to run fit.
|
393
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
403
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
394
404
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
395
405
|
SqlResultValidator(
|
396
406
|
session=session,
|
@@ -403,11 +413,12 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
403
413
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
404
414
|
).validate()
|
405
415
|
|
406
|
-
|
416
|
+
# Use posixpath to construct stage paths
|
417
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
418
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
407
419
|
local_result_file_name = get_temp_file_path()
|
408
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
409
420
|
|
410
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
421
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
411
422
|
statement_params = telemetry.get_function_usage_statement_params(
|
412
423
|
project=_PROJECT,
|
413
424
|
subproject=_SUBPROJECT,
|
@@ -433,6 +444,7 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
433
444
|
replace=True,
|
434
445
|
session=session,
|
435
446
|
statement_params=statement_params,
|
447
|
+
anonymous=True
|
436
448
|
)
|
437
449
|
def fit_wrapper_sproc(
|
438
450
|
session: Session,
|
@@ -441,7 +453,8 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
441
453
|
stage_result_file_name: str,
|
442
454
|
input_cols: List[str],
|
443
455
|
label_cols: List[str],
|
444
|
-
sample_weight_col: Optional[str]
|
456
|
+
sample_weight_col: Optional[str],
|
457
|
+
statement_params: Dict[str, str]
|
445
458
|
) -> str:
|
446
459
|
import cloudpickle as cp
|
447
460
|
import numpy as np
|
@@ -508,15 +521,15 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
508
521
|
api_calls=[Session.call],
|
509
522
|
custom_tags=dict([("autogen", True)]),
|
510
523
|
)
|
511
|
-
sproc_export_file_name =
|
512
|
-
|
524
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
525
|
+
session,
|
513
526
|
query,
|
514
527
|
stage_transform_file_name,
|
515
528
|
stage_result_file_name,
|
516
529
|
identifier.get_unescaped_names(self.input_cols),
|
517
530
|
identifier.get_unescaped_names(self.label_cols),
|
518
531
|
identifier.get_unescaped_names(self.sample_weight_col),
|
519
|
-
statement_params
|
532
|
+
statement_params,
|
520
533
|
)
|
521
534
|
|
522
535
|
if "|" in sproc_export_file_name:
|
@@ -526,7 +539,7 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
526
539
|
print("\n".join(fields[1:]))
|
527
540
|
|
528
541
|
session.file.get(
|
529
|
-
|
542
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
530
543
|
local_result_file_name,
|
531
544
|
statement_params=statement_params
|
532
545
|
)
|
@@ -572,7 +585,7 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
572
585
|
|
573
586
|
# Register vectorized UDF for batch inference
|
574
587
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
575
|
-
safe_id=self.
|
588
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
576
589
|
|
577
590
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
578
591
|
# will try to pickle all of self which fails.
|
@@ -664,7 +677,7 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
664
677
|
return transformed_pandas_df.to_dict("records")
|
665
678
|
|
666
679
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
667
|
-
safe_id=self.
|
680
|
+
safe_id=self._get_rand_id()
|
668
681
|
)
|
669
682
|
|
670
683
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -831,11 +844,18 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
831
844
|
Transformed dataset.
|
832
845
|
"""
|
833
846
|
if isinstance(dataset, DataFrame):
|
847
|
+
expected_type_inferred = "float"
|
848
|
+
# when it is classifier, infer the datatype from label columns
|
849
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
850
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
851
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
852
|
+
)
|
853
|
+
|
834
854
|
output_df = self._batch_inference(
|
835
855
|
dataset=dataset,
|
836
856
|
inference_method="predict",
|
837
857
|
expected_output_cols_list=self.output_cols,
|
838
|
-
expected_output_cols_type=
|
858
|
+
expected_output_cols_type=expected_type_inferred,
|
839
859
|
)
|
840
860
|
elif isinstance(dataset, pd.DataFrame):
|
841
861
|
output_df = self._sklearn_inference(
|
@@ -906,10 +926,10 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
906
926
|
|
907
927
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
908
928
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
909
|
-
Returns
|
929
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
910
930
|
"""
|
911
931
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
912
|
-
return []
|
932
|
+
return [output_cols_prefix]
|
913
933
|
|
914
934
|
classes = self._sklearn_object.classes_
|
915
935
|
if isinstance(classes, numpy.ndarray):
|
@@ -1134,7 +1154,7 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
1134
1154
|
cp.dump(self._sklearn_object, local_score_file)
|
1135
1155
|
|
1136
1156
|
# Create temp stage to run score.
|
1137
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1157
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1138
1158
|
session = dataset._session
|
1139
1159
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1140
1160
|
SqlResultValidator(
|
@@ -1148,8 +1168,9 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
1148
1168
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1149
1169
|
).validate()
|
1150
1170
|
|
1151
|
-
|
1152
|
-
|
1171
|
+
# Use posixpath to construct stage paths
|
1172
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1173
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1153
1174
|
statement_params = telemetry.get_function_usage_statement_params(
|
1154
1175
|
project=_PROJECT,
|
1155
1176
|
subproject=_SUBPROJECT,
|
@@ -1175,6 +1196,7 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
1175
1196
|
replace=True,
|
1176
1197
|
session=session,
|
1177
1198
|
statement_params=statement_params,
|
1199
|
+
anonymous=True
|
1178
1200
|
)
|
1179
1201
|
def score_wrapper_sproc(
|
1180
1202
|
session: Session,
|
@@ -1182,7 +1204,8 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
1182
1204
|
stage_score_file_name: str,
|
1183
1205
|
input_cols: List[str],
|
1184
1206
|
label_cols: List[str],
|
1185
|
-
sample_weight_col: Optional[str]
|
1207
|
+
sample_weight_col: Optional[str],
|
1208
|
+
statement_params: Dict[str, str]
|
1186
1209
|
) -> float:
|
1187
1210
|
import cloudpickle as cp
|
1188
1211
|
import numpy as np
|
@@ -1232,14 +1255,14 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
1232
1255
|
api_calls=[Session.call],
|
1233
1256
|
custom_tags=dict([("autogen", True)]),
|
1234
1257
|
)
|
1235
|
-
score =
|
1236
|
-
|
1258
|
+
score = score_wrapper_sproc(
|
1259
|
+
session,
|
1237
1260
|
query,
|
1238
1261
|
stage_score_file_name,
|
1239
1262
|
identifier.get_unescaped_names(self.input_cols),
|
1240
1263
|
identifier.get_unescaped_names(self.label_cols),
|
1241
1264
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1242
|
-
statement_params
|
1265
|
+
statement_params,
|
1243
1266
|
)
|
1244
1267
|
|
1245
1268
|
cleanup_temp_files([local_score_file_name])
|
@@ -1257,18 +1280,20 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
1257
1280
|
if self._sklearn_object._estimator_type == 'classifier':
|
1258
1281
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1259
1282
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1260
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1283
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1284
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1261
1285
|
# For regressor, the type of predict is float64
|
1262
1286
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1263
1287
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1264
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1265
|
-
|
1288
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1289
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1266
1290
|
for prob_func in PROB_FUNCTIONS:
|
1267
1291
|
if hasattr(self, prob_func):
|
1268
1292
|
output_cols_prefix: str = f"{prob_func}_"
|
1269
1293
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1270
1294
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1271
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1295
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1296
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1272
1297
|
|
1273
1298
|
@property
|
1274
1299
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
26
27
|
from snowflake.snowpark import DataFrame, Session
|
27
28
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
28
29
|
from snowflake.snowpark.types import PandasSeries
|
30
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
31
|
|
30
32
|
from snowflake.ml.model.model_signature import (
|
31
33
|
DataType,
|
@@ -390,7 +392,6 @@ class XGBClassifier(BaseTransformer):
|
|
390
392
|
**kwargs,
|
391
393
|
) -> None:
|
392
394
|
super().__init__()
|
393
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
394
395
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
395
396
|
|
396
397
|
self._deps = list(deps)
|
@@ -412,6 +413,15 @@ class XGBClassifier(BaseTransformer):
|
|
412
413
|
self.set_drop_input_cols(drop_input_cols)
|
413
414
|
self.set_sample_weight_col(sample_weight_col)
|
414
415
|
|
416
|
+
def _get_rand_id(self) -> str:
|
417
|
+
"""
|
418
|
+
Generate random id to be used in sproc and stage names.
|
419
|
+
|
420
|
+
Returns:
|
421
|
+
Random id string usable in sproc, table, and stage names.
|
422
|
+
"""
|
423
|
+
return str(uuid4()).replace("-", "_").upper()
|
424
|
+
|
415
425
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
416
426
|
"""
|
417
427
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -490,7 +500,7 @@ class XGBClassifier(BaseTransformer):
|
|
490
500
|
cp.dump(self._sklearn_object, local_transform_file)
|
491
501
|
|
492
502
|
# Create temp stage to run fit.
|
493
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
503
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
494
504
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
495
505
|
SqlResultValidator(
|
496
506
|
session=session,
|
@@ -503,11 +513,12 @@ class XGBClassifier(BaseTransformer):
|
|
503
513
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
504
514
|
).validate()
|
505
515
|
|
506
|
-
|
516
|
+
# Use posixpath to construct stage paths
|
517
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
518
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
507
519
|
local_result_file_name = get_temp_file_path()
|
508
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
509
520
|
|
510
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
521
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
511
522
|
statement_params = telemetry.get_function_usage_statement_params(
|
512
523
|
project=_PROJECT,
|
513
524
|
subproject=_SUBPROJECT,
|
@@ -533,6 +544,7 @@ class XGBClassifier(BaseTransformer):
|
|
533
544
|
replace=True,
|
534
545
|
session=session,
|
535
546
|
statement_params=statement_params,
|
547
|
+
anonymous=True
|
536
548
|
)
|
537
549
|
def fit_wrapper_sproc(
|
538
550
|
session: Session,
|
@@ -541,7 +553,8 @@ class XGBClassifier(BaseTransformer):
|
|
541
553
|
stage_result_file_name: str,
|
542
554
|
input_cols: List[str],
|
543
555
|
label_cols: List[str],
|
544
|
-
sample_weight_col: Optional[str]
|
556
|
+
sample_weight_col: Optional[str],
|
557
|
+
statement_params: Dict[str, str]
|
545
558
|
) -> str:
|
546
559
|
import cloudpickle as cp
|
547
560
|
import numpy as np
|
@@ -608,15 +621,15 @@ class XGBClassifier(BaseTransformer):
|
|
608
621
|
api_calls=[Session.call],
|
609
622
|
custom_tags=dict([("autogen", True)]),
|
610
623
|
)
|
611
|
-
sproc_export_file_name =
|
612
|
-
|
624
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
625
|
+
session,
|
613
626
|
query,
|
614
627
|
stage_transform_file_name,
|
615
628
|
stage_result_file_name,
|
616
629
|
identifier.get_unescaped_names(self.input_cols),
|
617
630
|
identifier.get_unescaped_names(self.label_cols),
|
618
631
|
identifier.get_unescaped_names(self.sample_weight_col),
|
619
|
-
statement_params
|
632
|
+
statement_params,
|
620
633
|
)
|
621
634
|
|
622
635
|
if "|" in sproc_export_file_name:
|
@@ -626,7 +639,7 @@ class XGBClassifier(BaseTransformer):
|
|
626
639
|
print("\n".join(fields[1:]))
|
627
640
|
|
628
641
|
session.file.get(
|
629
|
-
|
642
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
630
643
|
local_result_file_name,
|
631
644
|
statement_params=statement_params
|
632
645
|
)
|
@@ -672,7 +685,7 @@ class XGBClassifier(BaseTransformer):
|
|
672
685
|
|
673
686
|
# Register vectorized UDF for batch inference
|
674
687
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
675
|
-
safe_id=self.
|
688
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
676
689
|
|
677
690
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
678
691
|
# will try to pickle all of self which fails.
|
@@ -764,7 +777,7 @@ class XGBClassifier(BaseTransformer):
|
|
764
777
|
return transformed_pandas_df.to_dict("records")
|
765
778
|
|
766
779
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
767
|
-
safe_id=self.
|
780
|
+
safe_id=self._get_rand_id()
|
768
781
|
)
|
769
782
|
|
770
783
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -931,11 +944,18 @@ class XGBClassifier(BaseTransformer):
|
|
931
944
|
Transformed dataset.
|
932
945
|
"""
|
933
946
|
if isinstance(dataset, DataFrame):
|
947
|
+
expected_type_inferred = ""
|
948
|
+
# when it is classifier, infer the datatype from label columns
|
949
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
950
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
951
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
952
|
+
)
|
953
|
+
|
934
954
|
output_df = self._batch_inference(
|
935
955
|
dataset=dataset,
|
936
956
|
inference_method="predict",
|
937
957
|
expected_output_cols_list=self.output_cols,
|
938
|
-
expected_output_cols_type=
|
958
|
+
expected_output_cols_type=expected_type_inferred,
|
939
959
|
)
|
940
960
|
elif isinstance(dataset, pd.DataFrame):
|
941
961
|
output_df = self._sklearn_inference(
|
@@ -1006,10 +1026,10 @@ class XGBClassifier(BaseTransformer):
|
|
1006
1026
|
|
1007
1027
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
1008
1028
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
1009
|
-
Returns
|
1029
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
1010
1030
|
"""
|
1011
1031
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
1012
|
-
return []
|
1032
|
+
return [output_cols_prefix]
|
1013
1033
|
|
1014
1034
|
classes = self._sklearn_object.classes_
|
1015
1035
|
if isinstance(classes, numpy.ndarray):
|
@@ -1238,7 +1258,7 @@ class XGBClassifier(BaseTransformer):
|
|
1238
1258
|
cp.dump(self._sklearn_object, local_score_file)
|
1239
1259
|
|
1240
1260
|
# Create temp stage to run score.
|
1241
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1261
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1242
1262
|
session = dataset._session
|
1243
1263
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1244
1264
|
SqlResultValidator(
|
@@ -1252,8 +1272,9 @@ class XGBClassifier(BaseTransformer):
|
|
1252
1272
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1253
1273
|
).validate()
|
1254
1274
|
|
1255
|
-
|
1256
|
-
|
1275
|
+
# Use posixpath to construct stage paths
|
1276
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1277
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1257
1278
|
statement_params = telemetry.get_function_usage_statement_params(
|
1258
1279
|
project=_PROJECT,
|
1259
1280
|
subproject=_SUBPROJECT,
|
@@ -1279,6 +1300,7 @@ class XGBClassifier(BaseTransformer):
|
|
1279
1300
|
replace=True,
|
1280
1301
|
session=session,
|
1281
1302
|
statement_params=statement_params,
|
1303
|
+
anonymous=True
|
1282
1304
|
)
|
1283
1305
|
def score_wrapper_sproc(
|
1284
1306
|
session: Session,
|
@@ -1286,7 +1308,8 @@ class XGBClassifier(BaseTransformer):
|
|
1286
1308
|
stage_score_file_name: str,
|
1287
1309
|
input_cols: List[str],
|
1288
1310
|
label_cols: List[str],
|
1289
|
-
sample_weight_col: Optional[str]
|
1311
|
+
sample_weight_col: Optional[str],
|
1312
|
+
statement_params: Dict[str, str]
|
1290
1313
|
) -> float:
|
1291
1314
|
import cloudpickle as cp
|
1292
1315
|
import numpy as np
|
@@ -1336,14 +1359,14 @@ class XGBClassifier(BaseTransformer):
|
|
1336
1359
|
api_calls=[Session.call],
|
1337
1360
|
custom_tags=dict([("autogen", True)]),
|
1338
1361
|
)
|
1339
|
-
score =
|
1340
|
-
|
1362
|
+
score = score_wrapper_sproc(
|
1363
|
+
session,
|
1341
1364
|
query,
|
1342
1365
|
stage_score_file_name,
|
1343
1366
|
identifier.get_unescaped_names(self.input_cols),
|
1344
1367
|
identifier.get_unescaped_names(self.label_cols),
|
1345
1368
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1346
|
-
statement_params
|
1369
|
+
statement_params,
|
1347
1370
|
)
|
1348
1371
|
|
1349
1372
|
cleanup_temp_files([local_score_file_name])
|
@@ -1361,18 +1384,20 @@ class XGBClassifier(BaseTransformer):
|
|
1361
1384
|
if self._sklearn_object._estimator_type == 'classifier':
|
1362
1385
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1363
1386
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1364
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1387
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1388
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1365
1389
|
# For regressor, the type of predict is float64
|
1366
1390
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1367
1391
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1368
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1369
|
-
|
1392
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1393
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1370
1394
|
for prob_func in PROB_FUNCTIONS:
|
1371
1395
|
if hasattr(self, prob_func):
|
1372
1396
|
output_cols_prefix: str = f"{prob_func}_"
|
1373
1397
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1374
1398
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1375
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1399
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1400
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1376
1401
|
|
1377
1402
|
@property
|
1378
1403
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|