snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -250,7 +252,6 @@ class BaggingClassifier(BaseTransformer):
|
|
250
252
|
sample_weight_col: Optional[str] = None,
|
251
253
|
) -> None:
|
252
254
|
super().__init__()
|
253
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
254
255
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
255
256
|
deps = deps | _gather_dependencies(estimator)
|
256
257
|
deps = deps | _gather_dependencies(base_estimator)
|
@@ -283,6 +284,15 @@ class BaggingClassifier(BaseTransformer):
|
|
283
284
|
self.set_drop_input_cols(drop_input_cols)
|
284
285
|
self.set_sample_weight_col(sample_weight_col)
|
285
286
|
|
287
|
+
def _get_rand_id(self) -> str:
|
288
|
+
"""
|
289
|
+
Generate random id to be used in sproc and stage names.
|
290
|
+
|
291
|
+
Returns:
|
292
|
+
Random id string usable in sproc, table, and stage names.
|
293
|
+
"""
|
294
|
+
return str(uuid4()).replace("-", "_").upper()
|
295
|
+
|
286
296
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
287
297
|
"""
|
288
298
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -361,7 +371,7 @@ class BaggingClassifier(BaseTransformer):
|
|
361
371
|
cp.dump(self._sklearn_object, local_transform_file)
|
362
372
|
|
363
373
|
# Create temp stage to run fit.
|
364
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
374
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
365
375
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
366
376
|
SqlResultValidator(
|
367
377
|
session=session,
|
@@ -374,11 +384,12 @@ class BaggingClassifier(BaseTransformer):
|
|
374
384
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
375
385
|
).validate()
|
376
386
|
|
377
|
-
|
387
|
+
# Use posixpath to construct stage paths
|
388
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
389
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
378
390
|
local_result_file_name = get_temp_file_path()
|
379
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
380
391
|
|
381
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
392
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
382
393
|
statement_params = telemetry.get_function_usage_statement_params(
|
383
394
|
project=_PROJECT,
|
384
395
|
subproject=_SUBPROJECT,
|
@@ -404,6 +415,7 @@ class BaggingClassifier(BaseTransformer):
|
|
404
415
|
replace=True,
|
405
416
|
session=session,
|
406
417
|
statement_params=statement_params,
|
418
|
+
anonymous=True
|
407
419
|
)
|
408
420
|
def fit_wrapper_sproc(
|
409
421
|
session: Session,
|
@@ -412,7 +424,8 @@ class BaggingClassifier(BaseTransformer):
|
|
412
424
|
stage_result_file_name: str,
|
413
425
|
input_cols: List[str],
|
414
426
|
label_cols: List[str],
|
415
|
-
sample_weight_col: Optional[str]
|
427
|
+
sample_weight_col: Optional[str],
|
428
|
+
statement_params: Dict[str, str]
|
416
429
|
) -> str:
|
417
430
|
import cloudpickle as cp
|
418
431
|
import numpy as np
|
@@ -479,15 +492,15 @@ class BaggingClassifier(BaseTransformer):
|
|
479
492
|
api_calls=[Session.call],
|
480
493
|
custom_tags=dict([("autogen", True)]),
|
481
494
|
)
|
482
|
-
sproc_export_file_name =
|
483
|
-
|
495
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
496
|
+
session,
|
484
497
|
query,
|
485
498
|
stage_transform_file_name,
|
486
499
|
stage_result_file_name,
|
487
500
|
identifier.get_unescaped_names(self.input_cols),
|
488
501
|
identifier.get_unescaped_names(self.label_cols),
|
489
502
|
identifier.get_unescaped_names(self.sample_weight_col),
|
490
|
-
statement_params
|
503
|
+
statement_params,
|
491
504
|
)
|
492
505
|
|
493
506
|
if "|" in sproc_export_file_name:
|
@@ -497,7 +510,7 @@ class BaggingClassifier(BaseTransformer):
|
|
497
510
|
print("\n".join(fields[1:]))
|
498
511
|
|
499
512
|
session.file.get(
|
500
|
-
|
513
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
501
514
|
local_result_file_name,
|
502
515
|
statement_params=statement_params
|
503
516
|
)
|
@@ -543,7 +556,7 @@ class BaggingClassifier(BaseTransformer):
|
|
543
556
|
|
544
557
|
# Register vectorized UDF for batch inference
|
545
558
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
546
|
-
safe_id=self.
|
559
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
547
560
|
|
548
561
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
549
562
|
# will try to pickle all of self which fails.
|
@@ -635,7 +648,7 @@ class BaggingClassifier(BaseTransformer):
|
|
635
648
|
return transformed_pandas_df.to_dict("records")
|
636
649
|
|
637
650
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
638
|
-
safe_id=self.
|
651
|
+
safe_id=self._get_rand_id()
|
639
652
|
)
|
640
653
|
|
641
654
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -802,11 +815,18 @@ class BaggingClassifier(BaseTransformer):
|
|
802
815
|
Transformed dataset.
|
803
816
|
"""
|
804
817
|
if isinstance(dataset, DataFrame):
|
818
|
+
expected_type_inferred = ""
|
819
|
+
# when it is classifier, infer the datatype from label columns
|
820
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
821
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
822
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
823
|
+
)
|
824
|
+
|
805
825
|
output_df = self._batch_inference(
|
806
826
|
dataset=dataset,
|
807
827
|
inference_method="predict",
|
808
828
|
expected_output_cols_list=self.output_cols,
|
809
|
-
expected_output_cols_type=
|
829
|
+
expected_output_cols_type=expected_type_inferred,
|
810
830
|
)
|
811
831
|
elif isinstance(dataset, pd.DataFrame):
|
812
832
|
output_df = self._sklearn_inference(
|
@@ -877,10 +897,10 @@ class BaggingClassifier(BaseTransformer):
|
|
877
897
|
|
878
898
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
879
899
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
880
|
-
Returns
|
900
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
881
901
|
"""
|
882
902
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
883
|
-
return []
|
903
|
+
return [output_cols_prefix]
|
884
904
|
|
885
905
|
classes = self._sklearn_object.classes_
|
886
906
|
if isinstance(classes, numpy.ndarray):
|
@@ -1111,7 +1131,7 @@ class BaggingClassifier(BaseTransformer):
|
|
1111
1131
|
cp.dump(self._sklearn_object, local_score_file)
|
1112
1132
|
|
1113
1133
|
# Create temp stage to run score.
|
1114
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1134
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1115
1135
|
session = dataset._session
|
1116
1136
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1117
1137
|
SqlResultValidator(
|
@@ -1125,8 +1145,9 @@ class BaggingClassifier(BaseTransformer):
|
|
1125
1145
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1126
1146
|
).validate()
|
1127
1147
|
|
1128
|
-
|
1129
|
-
|
1148
|
+
# Use posixpath to construct stage paths
|
1149
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1150
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1130
1151
|
statement_params = telemetry.get_function_usage_statement_params(
|
1131
1152
|
project=_PROJECT,
|
1132
1153
|
subproject=_SUBPROJECT,
|
@@ -1152,6 +1173,7 @@ class BaggingClassifier(BaseTransformer):
|
|
1152
1173
|
replace=True,
|
1153
1174
|
session=session,
|
1154
1175
|
statement_params=statement_params,
|
1176
|
+
anonymous=True
|
1155
1177
|
)
|
1156
1178
|
def score_wrapper_sproc(
|
1157
1179
|
session: Session,
|
@@ -1159,7 +1181,8 @@ class BaggingClassifier(BaseTransformer):
|
|
1159
1181
|
stage_score_file_name: str,
|
1160
1182
|
input_cols: List[str],
|
1161
1183
|
label_cols: List[str],
|
1162
|
-
sample_weight_col: Optional[str]
|
1184
|
+
sample_weight_col: Optional[str],
|
1185
|
+
statement_params: Dict[str, str]
|
1163
1186
|
) -> float:
|
1164
1187
|
import cloudpickle as cp
|
1165
1188
|
import numpy as np
|
@@ -1209,14 +1232,14 @@ class BaggingClassifier(BaseTransformer):
|
|
1209
1232
|
api_calls=[Session.call],
|
1210
1233
|
custom_tags=dict([("autogen", True)]),
|
1211
1234
|
)
|
1212
|
-
score =
|
1213
|
-
|
1235
|
+
score = score_wrapper_sproc(
|
1236
|
+
session,
|
1214
1237
|
query,
|
1215
1238
|
stage_score_file_name,
|
1216
1239
|
identifier.get_unescaped_names(self.input_cols),
|
1217
1240
|
identifier.get_unescaped_names(self.label_cols),
|
1218
1241
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1219
|
-
statement_params
|
1242
|
+
statement_params,
|
1220
1243
|
)
|
1221
1244
|
|
1222
1245
|
cleanup_temp_files([local_score_file_name])
|
@@ -1234,18 +1257,20 @@ class BaggingClassifier(BaseTransformer):
|
|
1234
1257
|
if self._sklearn_object._estimator_type == 'classifier':
|
1235
1258
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1236
1259
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1237
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1260
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1261
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1238
1262
|
# For regressor, the type of predict is float64
|
1239
1263
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1240
1264
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1241
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1242
|
-
|
1265
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1266
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1243
1267
|
for prob_func in PROB_FUNCTIONS:
|
1244
1268
|
if hasattr(self, prob_func):
|
1245
1269
|
output_cols_prefix: str = f"{prob_func}_"
|
1246
1270
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1247
1271
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1248
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1272
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1273
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1249
1274
|
|
1250
1275
|
@property
|
1251
1276
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -250,7 +252,6 @@ class BaggingRegressor(BaseTransformer):
|
|
250
252
|
sample_weight_col: Optional[str] = None,
|
251
253
|
) -> None:
|
252
254
|
super().__init__()
|
253
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
254
255
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
255
256
|
deps = deps | _gather_dependencies(estimator)
|
256
257
|
deps = deps | _gather_dependencies(base_estimator)
|
@@ -283,6 +284,15 @@ class BaggingRegressor(BaseTransformer):
|
|
283
284
|
self.set_drop_input_cols(drop_input_cols)
|
284
285
|
self.set_sample_weight_col(sample_weight_col)
|
285
286
|
|
287
|
+
def _get_rand_id(self) -> str:
|
288
|
+
"""
|
289
|
+
Generate random id to be used in sproc and stage names.
|
290
|
+
|
291
|
+
Returns:
|
292
|
+
Random id string usable in sproc, table, and stage names.
|
293
|
+
"""
|
294
|
+
return str(uuid4()).replace("-", "_").upper()
|
295
|
+
|
286
296
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
287
297
|
"""
|
288
298
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -361,7 +371,7 @@ class BaggingRegressor(BaseTransformer):
|
|
361
371
|
cp.dump(self._sklearn_object, local_transform_file)
|
362
372
|
|
363
373
|
# Create temp stage to run fit.
|
364
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
374
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
365
375
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
366
376
|
SqlResultValidator(
|
367
377
|
session=session,
|
@@ -374,11 +384,12 @@ class BaggingRegressor(BaseTransformer):
|
|
374
384
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
375
385
|
).validate()
|
376
386
|
|
377
|
-
|
387
|
+
# Use posixpath to construct stage paths
|
388
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
389
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
378
390
|
local_result_file_name = get_temp_file_path()
|
379
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
380
391
|
|
381
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
392
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
382
393
|
statement_params = telemetry.get_function_usage_statement_params(
|
383
394
|
project=_PROJECT,
|
384
395
|
subproject=_SUBPROJECT,
|
@@ -404,6 +415,7 @@ class BaggingRegressor(BaseTransformer):
|
|
404
415
|
replace=True,
|
405
416
|
session=session,
|
406
417
|
statement_params=statement_params,
|
418
|
+
anonymous=True
|
407
419
|
)
|
408
420
|
def fit_wrapper_sproc(
|
409
421
|
session: Session,
|
@@ -412,7 +424,8 @@ class BaggingRegressor(BaseTransformer):
|
|
412
424
|
stage_result_file_name: str,
|
413
425
|
input_cols: List[str],
|
414
426
|
label_cols: List[str],
|
415
|
-
sample_weight_col: Optional[str]
|
427
|
+
sample_weight_col: Optional[str],
|
428
|
+
statement_params: Dict[str, str]
|
416
429
|
) -> str:
|
417
430
|
import cloudpickle as cp
|
418
431
|
import numpy as np
|
@@ -479,15 +492,15 @@ class BaggingRegressor(BaseTransformer):
|
|
479
492
|
api_calls=[Session.call],
|
480
493
|
custom_tags=dict([("autogen", True)]),
|
481
494
|
)
|
482
|
-
sproc_export_file_name =
|
483
|
-
|
495
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
496
|
+
session,
|
484
497
|
query,
|
485
498
|
stage_transform_file_name,
|
486
499
|
stage_result_file_name,
|
487
500
|
identifier.get_unescaped_names(self.input_cols),
|
488
501
|
identifier.get_unescaped_names(self.label_cols),
|
489
502
|
identifier.get_unescaped_names(self.sample_weight_col),
|
490
|
-
statement_params
|
503
|
+
statement_params,
|
491
504
|
)
|
492
505
|
|
493
506
|
if "|" in sproc_export_file_name:
|
@@ -497,7 +510,7 @@ class BaggingRegressor(BaseTransformer):
|
|
497
510
|
print("\n".join(fields[1:]))
|
498
511
|
|
499
512
|
session.file.get(
|
500
|
-
|
513
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
501
514
|
local_result_file_name,
|
502
515
|
statement_params=statement_params
|
503
516
|
)
|
@@ -543,7 +556,7 @@ class BaggingRegressor(BaseTransformer):
|
|
543
556
|
|
544
557
|
# Register vectorized UDF for batch inference
|
545
558
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
546
|
-
safe_id=self.
|
559
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
547
560
|
|
548
561
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
549
562
|
# will try to pickle all of self which fails.
|
@@ -635,7 +648,7 @@ class BaggingRegressor(BaseTransformer):
|
|
635
648
|
return transformed_pandas_df.to_dict("records")
|
636
649
|
|
637
650
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
638
|
-
safe_id=self.
|
651
|
+
safe_id=self._get_rand_id()
|
639
652
|
)
|
640
653
|
|
641
654
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -802,11 +815,18 @@ class BaggingRegressor(BaseTransformer):
|
|
802
815
|
Transformed dataset.
|
803
816
|
"""
|
804
817
|
if isinstance(dataset, DataFrame):
|
818
|
+
expected_type_inferred = "float"
|
819
|
+
# when it is classifier, infer the datatype from label columns
|
820
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
821
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
822
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
823
|
+
)
|
824
|
+
|
805
825
|
output_df = self._batch_inference(
|
806
826
|
dataset=dataset,
|
807
827
|
inference_method="predict",
|
808
828
|
expected_output_cols_list=self.output_cols,
|
809
|
-
expected_output_cols_type=
|
829
|
+
expected_output_cols_type=expected_type_inferred,
|
810
830
|
)
|
811
831
|
elif isinstance(dataset, pd.DataFrame):
|
812
832
|
output_df = self._sklearn_inference(
|
@@ -877,10 +897,10 @@ class BaggingRegressor(BaseTransformer):
|
|
877
897
|
|
878
898
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
879
899
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
880
|
-
Returns
|
900
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
881
901
|
"""
|
882
902
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
883
|
-
return []
|
903
|
+
return [output_cols_prefix]
|
884
904
|
|
885
905
|
classes = self._sklearn_object.classes_
|
886
906
|
if isinstance(classes, numpy.ndarray):
|
@@ -1105,7 +1125,7 @@ class BaggingRegressor(BaseTransformer):
|
|
1105
1125
|
cp.dump(self._sklearn_object, local_score_file)
|
1106
1126
|
|
1107
1127
|
# Create temp stage to run score.
|
1108
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1128
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1109
1129
|
session = dataset._session
|
1110
1130
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1111
1131
|
SqlResultValidator(
|
@@ -1119,8 +1139,9 @@ class BaggingRegressor(BaseTransformer):
|
|
1119
1139
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1120
1140
|
).validate()
|
1121
1141
|
|
1122
|
-
|
1123
|
-
|
1142
|
+
# Use posixpath to construct stage paths
|
1143
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1144
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1124
1145
|
statement_params = telemetry.get_function_usage_statement_params(
|
1125
1146
|
project=_PROJECT,
|
1126
1147
|
subproject=_SUBPROJECT,
|
@@ -1146,6 +1167,7 @@ class BaggingRegressor(BaseTransformer):
|
|
1146
1167
|
replace=True,
|
1147
1168
|
session=session,
|
1148
1169
|
statement_params=statement_params,
|
1170
|
+
anonymous=True
|
1149
1171
|
)
|
1150
1172
|
def score_wrapper_sproc(
|
1151
1173
|
session: Session,
|
@@ -1153,7 +1175,8 @@ class BaggingRegressor(BaseTransformer):
|
|
1153
1175
|
stage_score_file_name: str,
|
1154
1176
|
input_cols: List[str],
|
1155
1177
|
label_cols: List[str],
|
1156
|
-
sample_weight_col: Optional[str]
|
1178
|
+
sample_weight_col: Optional[str],
|
1179
|
+
statement_params: Dict[str, str]
|
1157
1180
|
) -> float:
|
1158
1181
|
import cloudpickle as cp
|
1159
1182
|
import numpy as np
|
@@ -1203,14 +1226,14 @@ class BaggingRegressor(BaseTransformer):
|
|
1203
1226
|
api_calls=[Session.call],
|
1204
1227
|
custom_tags=dict([("autogen", True)]),
|
1205
1228
|
)
|
1206
|
-
score =
|
1207
|
-
|
1229
|
+
score = score_wrapper_sproc(
|
1230
|
+
session,
|
1208
1231
|
query,
|
1209
1232
|
stage_score_file_name,
|
1210
1233
|
identifier.get_unescaped_names(self.input_cols),
|
1211
1234
|
identifier.get_unescaped_names(self.label_cols),
|
1212
1235
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1213
|
-
statement_params
|
1236
|
+
statement_params,
|
1214
1237
|
)
|
1215
1238
|
|
1216
1239
|
cleanup_temp_files([local_score_file_name])
|
@@ -1228,18 +1251,20 @@ class BaggingRegressor(BaseTransformer):
|
|
1228
1251
|
if self._sklearn_object._estimator_type == 'classifier':
|
1229
1252
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1230
1253
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1231
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1254
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1255
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1232
1256
|
# For regressor, the type of predict is float64
|
1233
1257
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1234
1258
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1235
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1236
|
-
|
1259
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1260
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1237
1261
|
for prob_func in PROB_FUNCTIONS:
|
1238
1262
|
if hasattr(self, prob_func):
|
1239
1263
|
output_cols_prefix: str = f"{prob_func}_"
|
1240
1264
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1241
1265
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1242
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1266
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1267
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1243
1268
|
|
1244
1269
|
@property
|
1245
1270
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|