snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -224,7 +226,6 @@ class AdaBoostClassifier(BaseTransformer):
|
|
224
226
|
sample_weight_col: Optional[str] = None,
|
225
227
|
) -> None:
|
226
228
|
super().__init__()
|
227
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
228
229
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
229
230
|
deps = deps | _gather_dependencies(estimator)
|
230
231
|
deps = deps | _gather_dependencies(base_estimator)
|
@@ -251,6 +252,15 @@ class AdaBoostClassifier(BaseTransformer):
|
|
251
252
|
self.set_drop_input_cols(drop_input_cols)
|
252
253
|
self.set_sample_weight_col(sample_weight_col)
|
253
254
|
|
255
|
+
def _get_rand_id(self) -> str:
|
256
|
+
"""
|
257
|
+
Generate random id to be used in sproc and stage names.
|
258
|
+
|
259
|
+
Returns:
|
260
|
+
Random id string usable in sproc, table, and stage names.
|
261
|
+
"""
|
262
|
+
return str(uuid4()).replace("-", "_").upper()
|
263
|
+
|
254
264
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
255
265
|
"""
|
256
266
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -329,7 +339,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
329
339
|
cp.dump(self._sklearn_object, local_transform_file)
|
330
340
|
|
331
341
|
# Create temp stage to run fit.
|
332
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
342
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
333
343
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
334
344
|
SqlResultValidator(
|
335
345
|
session=session,
|
@@ -342,11 +352,12 @@ class AdaBoostClassifier(BaseTransformer):
|
|
342
352
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
343
353
|
).validate()
|
344
354
|
|
345
|
-
|
355
|
+
# Use posixpath to construct stage paths
|
356
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
357
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
358
|
local_result_file_name = get_temp_file_path()
|
347
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
348
359
|
|
349
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
360
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
350
361
|
statement_params = telemetry.get_function_usage_statement_params(
|
351
362
|
project=_PROJECT,
|
352
363
|
subproject=_SUBPROJECT,
|
@@ -372,6 +383,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
372
383
|
replace=True,
|
373
384
|
session=session,
|
374
385
|
statement_params=statement_params,
|
386
|
+
anonymous=True
|
375
387
|
)
|
376
388
|
def fit_wrapper_sproc(
|
377
389
|
session: Session,
|
@@ -380,7 +392,8 @@ class AdaBoostClassifier(BaseTransformer):
|
|
380
392
|
stage_result_file_name: str,
|
381
393
|
input_cols: List[str],
|
382
394
|
label_cols: List[str],
|
383
|
-
sample_weight_col: Optional[str]
|
395
|
+
sample_weight_col: Optional[str],
|
396
|
+
statement_params: Dict[str, str]
|
384
397
|
) -> str:
|
385
398
|
import cloudpickle as cp
|
386
399
|
import numpy as np
|
@@ -447,15 +460,15 @@ class AdaBoostClassifier(BaseTransformer):
|
|
447
460
|
api_calls=[Session.call],
|
448
461
|
custom_tags=dict([("autogen", True)]),
|
449
462
|
)
|
450
|
-
sproc_export_file_name =
|
451
|
-
|
463
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
464
|
+
session,
|
452
465
|
query,
|
453
466
|
stage_transform_file_name,
|
454
467
|
stage_result_file_name,
|
455
468
|
identifier.get_unescaped_names(self.input_cols),
|
456
469
|
identifier.get_unescaped_names(self.label_cols),
|
457
470
|
identifier.get_unescaped_names(self.sample_weight_col),
|
458
|
-
statement_params
|
471
|
+
statement_params,
|
459
472
|
)
|
460
473
|
|
461
474
|
if "|" in sproc_export_file_name:
|
@@ -465,7 +478,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
465
478
|
print("\n".join(fields[1:]))
|
466
479
|
|
467
480
|
session.file.get(
|
468
|
-
|
481
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
469
482
|
local_result_file_name,
|
470
483
|
statement_params=statement_params
|
471
484
|
)
|
@@ -511,7 +524,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
511
524
|
|
512
525
|
# Register vectorized UDF for batch inference
|
513
526
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
514
|
-
safe_id=self.
|
527
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
515
528
|
|
516
529
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
517
530
|
# will try to pickle all of self which fails.
|
@@ -603,7 +616,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
603
616
|
return transformed_pandas_df.to_dict("records")
|
604
617
|
|
605
618
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
606
|
-
safe_id=self.
|
619
|
+
safe_id=self._get_rand_id()
|
607
620
|
)
|
608
621
|
|
609
622
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -770,11 +783,18 @@ class AdaBoostClassifier(BaseTransformer):
|
|
770
783
|
Transformed dataset.
|
771
784
|
"""
|
772
785
|
if isinstance(dataset, DataFrame):
|
786
|
+
expected_type_inferred = ""
|
787
|
+
# when it is classifier, infer the datatype from label columns
|
788
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
789
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
790
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
791
|
+
)
|
792
|
+
|
773
793
|
output_df = self._batch_inference(
|
774
794
|
dataset=dataset,
|
775
795
|
inference_method="predict",
|
776
796
|
expected_output_cols_list=self.output_cols,
|
777
|
-
expected_output_cols_type=
|
797
|
+
expected_output_cols_type=expected_type_inferred,
|
778
798
|
)
|
779
799
|
elif isinstance(dataset, pd.DataFrame):
|
780
800
|
output_df = self._sklearn_inference(
|
@@ -845,10 +865,10 @@ class AdaBoostClassifier(BaseTransformer):
|
|
845
865
|
|
846
866
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
847
867
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
848
|
-
Returns
|
868
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
849
869
|
"""
|
850
870
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
851
|
-
return []
|
871
|
+
return [output_cols_prefix]
|
852
872
|
|
853
873
|
classes = self._sklearn_object.classes_
|
854
874
|
if isinstance(classes, numpy.ndarray):
|
@@ -1079,7 +1099,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
1079
1099
|
cp.dump(self._sklearn_object, local_score_file)
|
1080
1100
|
|
1081
1101
|
# Create temp stage to run score.
|
1082
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1102
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1083
1103
|
session = dataset._session
|
1084
1104
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1085
1105
|
SqlResultValidator(
|
@@ -1093,8 +1113,9 @@ class AdaBoostClassifier(BaseTransformer):
|
|
1093
1113
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1094
1114
|
).validate()
|
1095
1115
|
|
1096
|
-
|
1097
|
-
|
1116
|
+
# Use posixpath to construct stage paths
|
1117
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1118
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1098
1119
|
statement_params = telemetry.get_function_usage_statement_params(
|
1099
1120
|
project=_PROJECT,
|
1100
1121
|
subproject=_SUBPROJECT,
|
@@ -1120,6 +1141,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
1120
1141
|
replace=True,
|
1121
1142
|
session=session,
|
1122
1143
|
statement_params=statement_params,
|
1144
|
+
anonymous=True
|
1123
1145
|
)
|
1124
1146
|
def score_wrapper_sproc(
|
1125
1147
|
session: Session,
|
@@ -1127,7 +1149,8 @@ class AdaBoostClassifier(BaseTransformer):
|
|
1127
1149
|
stage_score_file_name: str,
|
1128
1150
|
input_cols: List[str],
|
1129
1151
|
label_cols: List[str],
|
1130
|
-
sample_weight_col: Optional[str]
|
1152
|
+
sample_weight_col: Optional[str],
|
1153
|
+
statement_params: Dict[str, str]
|
1131
1154
|
) -> float:
|
1132
1155
|
import cloudpickle as cp
|
1133
1156
|
import numpy as np
|
@@ -1177,14 +1200,14 @@ class AdaBoostClassifier(BaseTransformer):
|
|
1177
1200
|
api_calls=[Session.call],
|
1178
1201
|
custom_tags=dict([("autogen", True)]),
|
1179
1202
|
)
|
1180
|
-
score =
|
1181
|
-
|
1203
|
+
score = score_wrapper_sproc(
|
1204
|
+
session,
|
1182
1205
|
query,
|
1183
1206
|
stage_score_file_name,
|
1184
1207
|
identifier.get_unescaped_names(self.input_cols),
|
1185
1208
|
identifier.get_unescaped_names(self.label_cols),
|
1186
1209
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1187
|
-
statement_params
|
1210
|
+
statement_params,
|
1188
1211
|
)
|
1189
1212
|
|
1190
1213
|
cleanup_temp_files([local_score_file_name])
|
@@ -1202,18 +1225,20 @@ class AdaBoostClassifier(BaseTransformer):
|
|
1202
1225
|
if self._sklearn_object._estimator_type == 'classifier':
|
1203
1226
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1204
1227
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1205
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1228
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1229
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1206
1230
|
# For regressor, the type of predict is float64
|
1207
1231
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1208
1232
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1209
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1210
|
-
|
1233
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1234
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1211
1235
|
for prob_func in PROB_FUNCTIONS:
|
1212
1236
|
if hasattr(self, prob_func):
|
1213
1237
|
output_cols_prefix: str = f"{prob_func}_"
|
1214
1238
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1215
1239
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1216
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1240
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1241
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1217
1242
|
|
1218
1243
|
@property
|
1219
1244
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -221,7 +223,6 @@ class AdaBoostRegressor(BaseTransformer):
|
|
221
223
|
sample_weight_col: Optional[str] = None,
|
222
224
|
) -> None:
|
223
225
|
super().__init__()
|
224
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
225
226
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
226
227
|
deps = deps | _gather_dependencies(estimator)
|
227
228
|
deps = deps | _gather_dependencies(base_estimator)
|
@@ -248,6 +249,15 @@ class AdaBoostRegressor(BaseTransformer):
|
|
248
249
|
self.set_drop_input_cols(drop_input_cols)
|
249
250
|
self.set_sample_weight_col(sample_weight_col)
|
250
251
|
|
252
|
+
def _get_rand_id(self) -> str:
|
253
|
+
"""
|
254
|
+
Generate random id to be used in sproc and stage names.
|
255
|
+
|
256
|
+
Returns:
|
257
|
+
Random id string usable in sproc, table, and stage names.
|
258
|
+
"""
|
259
|
+
return str(uuid4()).replace("-", "_").upper()
|
260
|
+
|
251
261
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
252
262
|
"""
|
253
263
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -326,7 +336,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
326
336
|
cp.dump(self._sklearn_object, local_transform_file)
|
327
337
|
|
328
338
|
# Create temp stage to run fit.
|
329
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
339
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
330
340
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
331
341
|
SqlResultValidator(
|
332
342
|
session=session,
|
@@ -339,11 +349,12 @@ class AdaBoostRegressor(BaseTransformer):
|
|
339
349
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
340
350
|
).validate()
|
341
351
|
|
342
|
-
|
352
|
+
# Use posixpath to construct stage paths
|
353
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
354
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
343
355
|
local_result_file_name = get_temp_file_path()
|
344
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
345
356
|
|
346
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
357
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
347
358
|
statement_params = telemetry.get_function_usage_statement_params(
|
348
359
|
project=_PROJECT,
|
349
360
|
subproject=_SUBPROJECT,
|
@@ -369,6 +380,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
369
380
|
replace=True,
|
370
381
|
session=session,
|
371
382
|
statement_params=statement_params,
|
383
|
+
anonymous=True
|
372
384
|
)
|
373
385
|
def fit_wrapper_sproc(
|
374
386
|
session: Session,
|
@@ -377,7 +389,8 @@ class AdaBoostRegressor(BaseTransformer):
|
|
377
389
|
stage_result_file_name: str,
|
378
390
|
input_cols: List[str],
|
379
391
|
label_cols: List[str],
|
380
|
-
sample_weight_col: Optional[str]
|
392
|
+
sample_weight_col: Optional[str],
|
393
|
+
statement_params: Dict[str, str]
|
381
394
|
) -> str:
|
382
395
|
import cloudpickle as cp
|
383
396
|
import numpy as np
|
@@ -444,15 +457,15 @@ class AdaBoostRegressor(BaseTransformer):
|
|
444
457
|
api_calls=[Session.call],
|
445
458
|
custom_tags=dict([("autogen", True)]),
|
446
459
|
)
|
447
|
-
sproc_export_file_name =
|
448
|
-
|
460
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
461
|
+
session,
|
449
462
|
query,
|
450
463
|
stage_transform_file_name,
|
451
464
|
stage_result_file_name,
|
452
465
|
identifier.get_unescaped_names(self.input_cols),
|
453
466
|
identifier.get_unescaped_names(self.label_cols),
|
454
467
|
identifier.get_unescaped_names(self.sample_weight_col),
|
455
|
-
statement_params
|
468
|
+
statement_params,
|
456
469
|
)
|
457
470
|
|
458
471
|
if "|" in sproc_export_file_name:
|
@@ -462,7 +475,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
462
475
|
print("\n".join(fields[1:]))
|
463
476
|
|
464
477
|
session.file.get(
|
465
|
-
|
478
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
466
479
|
local_result_file_name,
|
467
480
|
statement_params=statement_params
|
468
481
|
)
|
@@ -508,7 +521,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
508
521
|
|
509
522
|
# Register vectorized UDF for batch inference
|
510
523
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
511
|
-
safe_id=self.
|
524
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
512
525
|
|
513
526
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
514
527
|
# will try to pickle all of self which fails.
|
@@ -600,7 +613,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
600
613
|
return transformed_pandas_df.to_dict("records")
|
601
614
|
|
602
615
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
603
|
-
safe_id=self.
|
616
|
+
safe_id=self._get_rand_id()
|
604
617
|
)
|
605
618
|
|
606
619
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -767,11 +780,18 @@ class AdaBoostRegressor(BaseTransformer):
|
|
767
780
|
Transformed dataset.
|
768
781
|
"""
|
769
782
|
if isinstance(dataset, DataFrame):
|
783
|
+
expected_type_inferred = "float"
|
784
|
+
# when it is classifier, infer the datatype from label columns
|
785
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
786
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
787
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
788
|
+
)
|
789
|
+
|
770
790
|
output_df = self._batch_inference(
|
771
791
|
dataset=dataset,
|
772
792
|
inference_method="predict",
|
773
793
|
expected_output_cols_list=self.output_cols,
|
774
|
-
expected_output_cols_type=
|
794
|
+
expected_output_cols_type=expected_type_inferred,
|
775
795
|
)
|
776
796
|
elif isinstance(dataset, pd.DataFrame):
|
777
797
|
output_df = self._sklearn_inference(
|
@@ -842,10 +862,10 @@ class AdaBoostRegressor(BaseTransformer):
|
|
842
862
|
|
843
863
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
844
864
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
845
|
-
Returns
|
865
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
846
866
|
"""
|
847
867
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
848
|
-
return []
|
868
|
+
return [output_cols_prefix]
|
849
869
|
|
850
870
|
classes = self._sklearn_object.classes_
|
851
871
|
if isinstance(classes, numpy.ndarray):
|
@@ -1070,7 +1090,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
1070
1090
|
cp.dump(self._sklearn_object, local_score_file)
|
1071
1091
|
|
1072
1092
|
# Create temp stage to run score.
|
1073
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1093
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1074
1094
|
session = dataset._session
|
1075
1095
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1076
1096
|
SqlResultValidator(
|
@@ -1084,8 +1104,9 @@ class AdaBoostRegressor(BaseTransformer):
|
|
1084
1104
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1085
1105
|
).validate()
|
1086
1106
|
|
1087
|
-
|
1088
|
-
|
1107
|
+
# Use posixpath to construct stage paths
|
1108
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1109
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1089
1110
|
statement_params = telemetry.get_function_usage_statement_params(
|
1090
1111
|
project=_PROJECT,
|
1091
1112
|
subproject=_SUBPROJECT,
|
@@ -1111,6 +1132,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
1111
1132
|
replace=True,
|
1112
1133
|
session=session,
|
1113
1134
|
statement_params=statement_params,
|
1135
|
+
anonymous=True
|
1114
1136
|
)
|
1115
1137
|
def score_wrapper_sproc(
|
1116
1138
|
session: Session,
|
@@ -1118,7 +1140,8 @@ class AdaBoostRegressor(BaseTransformer):
|
|
1118
1140
|
stage_score_file_name: str,
|
1119
1141
|
input_cols: List[str],
|
1120
1142
|
label_cols: List[str],
|
1121
|
-
sample_weight_col: Optional[str]
|
1143
|
+
sample_weight_col: Optional[str],
|
1144
|
+
statement_params: Dict[str, str]
|
1122
1145
|
) -> float:
|
1123
1146
|
import cloudpickle as cp
|
1124
1147
|
import numpy as np
|
@@ -1168,14 +1191,14 @@ class AdaBoostRegressor(BaseTransformer):
|
|
1168
1191
|
api_calls=[Session.call],
|
1169
1192
|
custom_tags=dict([("autogen", True)]),
|
1170
1193
|
)
|
1171
|
-
score =
|
1172
|
-
|
1194
|
+
score = score_wrapper_sproc(
|
1195
|
+
session,
|
1173
1196
|
query,
|
1174
1197
|
stage_score_file_name,
|
1175
1198
|
identifier.get_unescaped_names(self.input_cols),
|
1176
1199
|
identifier.get_unescaped_names(self.label_cols),
|
1177
1200
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1178
|
-
statement_params
|
1201
|
+
statement_params,
|
1179
1202
|
)
|
1180
1203
|
|
1181
1204
|
cleanup_temp_files([local_score_file_name])
|
@@ -1193,18 +1216,20 @@ class AdaBoostRegressor(BaseTransformer):
|
|
1193
1216
|
if self._sklearn_object._estimator_type == 'classifier':
|
1194
1217
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1195
1218
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1196
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1219
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1220
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1197
1221
|
# For regressor, the type of predict is float64
|
1198
1222
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1199
1223
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1200
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1201
|
-
|
1224
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1225
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1202
1226
|
for prob_func in PROB_FUNCTIONS:
|
1203
1227
|
if hasattr(self, prob_func):
|
1204
1228
|
output_cols_prefix: str = f"{prob_func}_"
|
1205
1229
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1206
1230
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1207
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1231
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1232
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1208
1233
|
|
1209
1234
|
@property
|
1210
1235
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|