snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -238,7 +240,6 @@ class BayesianRidge(BaseTransformer):
|
|
238
240
|
sample_weight_col: Optional[str] = None,
|
239
241
|
) -> None:
|
240
242
|
super().__init__()
|
241
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
242
243
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
243
244
|
|
244
245
|
self._deps = list(deps)
|
@@ -269,6 +270,15 @@ class BayesianRidge(BaseTransformer):
|
|
269
270
|
self.set_drop_input_cols(drop_input_cols)
|
270
271
|
self.set_sample_weight_col(sample_weight_col)
|
271
272
|
|
273
|
+
def _get_rand_id(self) -> str:
|
274
|
+
"""
|
275
|
+
Generate random id to be used in sproc and stage names.
|
276
|
+
|
277
|
+
Returns:
|
278
|
+
Random id string usable in sproc, table, and stage names.
|
279
|
+
"""
|
280
|
+
return str(uuid4()).replace("-", "_").upper()
|
281
|
+
|
272
282
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
273
283
|
"""
|
274
284
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -347,7 +357,7 @@ class BayesianRidge(BaseTransformer):
|
|
347
357
|
cp.dump(self._sklearn_object, local_transform_file)
|
348
358
|
|
349
359
|
# Create temp stage to run fit.
|
350
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
360
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
351
361
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
352
362
|
SqlResultValidator(
|
353
363
|
session=session,
|
@@ -360,11 +370,12 @@ class BayesianRidge(BaseTransformer):
|
|
360
370
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
361
371
|
).validate()
|
362
372
|
|
363
|
-
|
373
|
+
# Use posixpath to construct stage paths
|
374
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
375
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
364
376
|
local_result_file_name = get_temp_file_path()
|
365
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
366
377
|
|
367
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
378
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
368
379
|
statement_params = telemetry.get_function_usage_statement_params(
|
369
380
|
project=_PROJECT,
|
370
381
|
subproject=_SUBPROJECT,
|
@@ -390,6 +401,7 @@ class BayesianRidge(BaseTransformer):
|
|
390
401
|
replace=True,
|
391
402
|
session=session,
|
392
403
|
statement_params=statement_params,
|
404
|
+
anonymous=True
|
393
405
|
)
|
394
406
|
def fit_wrapper_sproc(
|
395
407
|
session: Session,
|
@@ -398,7 +410,8 @@ class BayesianRidge(BaseTransformer):
|
|
398
410
|
stage_result_file_name: str,
|
399
411
|
input_cols: List[str],
|
400
412
|
label_cols: List[str],
|
401
|
-
sample_weight_col: Optional[str]
|
413
|
+
sample_weight_col: Optional[str],
|
414
|
+
statement_params: Dict[str, str]
|
402
415
|
) -> str:
|
403
416
|
import cloudpickle as cp
|
404
417
|
import numpy as np
|
@@ -465,15 +478,15 @@ class BayesianRidge(BaseTransformer):
|
|
465
478
|
api_calls=[Session.call],
|
466
479
|
custom_tags=dict([("autogen", True)]),
|
467
480
|
)
|
468
|
-
sproc_export_file_name =
|
469
|
-
|
481
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
482
|
+
session,
|
470
483
|
query,
|
471
484
|
stage_transform_file_name,
|
472
485
|
stage_result_file_name,
|
473
486
|
identifier.get_unescaped_names(self.input_cols),
|
474
487
|
identifier.get_unescaped_names(self.label_cols),
|
475
488
|
identifier.get_unescaped_names(self.sample_weight_col),
|
476
|
-
statement_params
|
489
|
+
statement_params,
|
477
490
|
)
|
478
491
|
|
479
492
|
if "|" in sproc_export_file_name:
|
@@ -483,7 +496,7 @@ class BayesianRidge(BaseTransformer):
|
|
483
496
|
print("\n".join(fields[1:]))
|
484
497
|
|
485
498
|
session.file.get(
|
486
|
-
|
499
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
487
500
|
local_result_file_name,
|
488
501
|
statement_params=statement_params
|
489
502
|
)
|
@@ -529,7 +542,7 @@ class BayesianRidge(BaseTransformer):
|
|
529
542
|
|
530
543
|
# Register vectorized UDF for batch inference
|
531
544
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
532
|
-
safe_id=self.
|
545
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
533
546
|
|
534
547
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
535
548
|
# will try to pickle all of self which fails.
|
@@ -621,7 +634,7 @@ class BayesianRidge(BaseTransformer):
|
|
621
634
|
return transformed_pandas_df.to_dict("records")
|
622
635
|
|
623
636
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
624
|
-
safe_id=self.
|
637
|
+
safe_id=self._get_rand_id()
|
625
638
|
)
|
626
639
|
|
627
640
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -788,11 +801,18 @@ class BayesianRidge(BaseTransformer):
|
|
788
801
|
Transformed dataset.
|
789
802
|
"""
|
790
803
|
if isinstance(dataset, DataFrame):
|
804
|
+
expected_type_inferred = "float"
|
805
|
+
# when it is classifier, infer the datatype from label columns
|
806
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
807
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
808
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
809
|
+
)
|
810
|
+
|
791
811
|
output_df = self._batch_inference(
|
792
812
|
dataset=dataset,
|
793
813
|
inference_method="predict",
|
794
814
|
expected_output_cols_list=self.output_cols,
|
795
|
-
expected_output_cols_type=
|
815
|
+
expected_output_cols_type=expected_type_inferred,
|
796
816
|
)
|
797
817
|
elif isinstance(dataset, pd.DataFrame):
|
798
818
|
output_df = self._sklearn_inference(
|
@@ -863,10 +883,10 @@ class BayesianRidge(BaseTransformer):
|
|
863
883
|
|
864
884
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
865
885
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
866
|
-
Returns
|
886
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
867
887
|
"""
|
868
888
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
869
|
-
return []
|
889
|
+
return [output_cols_prefix]
|
870
890
|
|
871
891
|
classes = self._sklearn_object.classes_
|
872
892
|
if isinstance(classes, numpy.ndarray):
|
@@ -1091,7 +1111,7 @@ class BayesianRidge(BaseTransformer):
|
|
1091
1111
|
cp.dump(self._sklearn_object, local_score_file)
|
1092
1112
|
|
1093
1113
|
# Create temp stage to run score.
|
1094
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1114
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1095
1115
|
session = dataset._session
|
1096
1116
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1097
1117
|
SqlResultValidator(
|
@@ -1105,8 +1125,9 @@ class BayesianRidge(BaseTransformer):
|
|
1105
1125
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1106
1126
|
).validate()
|
1107
1127
|
|
1108
|
-
|
1109
|
-
|
1128
|
+
# Use posixpath to construct stage paths
|
1129
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1130
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1110
1131
|
statement_params = telemetry.get_function_usage_statement_params(
|
1111
1132
|
project=_PROJECT,
|
1112
1133
|
subproject=_SUBPROJECT,
|
@@ -1132,6 +1153,7 @@ class BayesianRidge(BaseTransformer):
|
|
1132
1153
|
replace=True,
|
1133
1154
|
session=session,
|
1134
1155
|
statement_params=statement_params,
|
1156
|
+
anonymous=True
|
1135
1157
|
)
|
1136
1158
|
def score_wrapper_sproc(
|
1137
1159
|
session: Session,
|
@@ -1139,7 +1161,8 @@ class BayesianRidge(BaseTransformer):
|
|
1139
1161
|
stage_score_file_name: str,
|
1140
1162
|
input_cols: List[str],
|
1141
1163
|
label_cols: List[str],
|
1142
|
-
sample_weight_col: Optional[str]
|
1164
|
+
sample_weight_col: Optional[str],
|
1165
|
+
statement_params: Dict[str, str]
|
1143
1166
|
) -> float:
|
1144
1167
|
import cloudpickle as cp
|
1145
1168
|
import numpy as np
|
@@ -1189,14 +1212,14 @@ class BayesianRidge(BaseTransformer):
|
|
1189
1212
|
api_calls=[Session.call],
|
1190
1213
|
custom_tags=dict([("autogen", True)]),
|
1191
1214
|
)
|
1192
|
-
score =
|
1193
|
-
|
1215
|
+
score = score_wrapper_sproc(
|
1216
|
+
session,
|
1194
1217
|
query,
|
1195
1218
|
stage_score_file_name,
|
1196
1219
|
identifier.get_unescaped_names(self.input_cols),
|
1197
1220
|
identifier.get_unescaped_names(self.label_cols),
|
1198
1221
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1199
|
-
statement_params
|
1222
|
+
statement_params,
|
1200
1223
|
)
|
1201
1224
|
|
1202
1225
|
cleanup_temp_files([local_score_file_name])
|
@@ -1214,18 +1237,20 @@ class BayesianRidge(BaseTransformer):
|
|
1214
1237
|
if self._sklearn_object._estimator_type == 'classifier':
|
1215
1238
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1216
1239
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1217
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1240
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1241
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1218
1242
|
# For regressor, the type of predict is float64
|
1219
1243
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1220
1244
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1221
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1222
|
-
|
1245
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1246
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1223
1247
|
for prob_func in PROB_FUNCTIONS:
|
1224
1248
|
if hasattr(self, prob_func):
|
1225
1249
|
output_cols_prefix: str = f"{prob_func}_"
|
1226
1250
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1227
1251
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1228
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1252
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1253
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1229
1254
|
|
1230
1255
|
@property
|
1231
1256
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -245,7 +247,6 @@ class ElasticNet(BaseTransformer):
|
|
245
247
|
sample_weight_col: Optional[str] = None,
|
246
248
|
) -> None:
|
247
249
|
super().__init__()
|
248
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
249
250
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
250
251
|
|
251
252
|
self._deps = list(deps)
|
@@ -275,6 +276,15 @@ class ElasticNet(BaseTransformer):
|
|
275
276
|
self.set_drop_input_cols(drop_input_cols)
|
276
277
|
self.set_sample_weight_col(sample_weight_col)
|
277
278
|
|
279
|
+
def _get_rand_id(self) -> str:
|
280
|
+
"""
|
281
|
+
Generate random id to be used in sproc and stage names.
|
282
|
+
|
283
|
+
Returns:
|
284
|
+
Random id string usable in sproc, table, and stage names.
|
285
|
+
"""
|
286
|
+
return str(uuid4()).replace("-", "_").upper()
|
287
|
+
|
278
288
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
279
289
|
"""
|
280
290
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -353,7 +363,7 @@ class ElasticNet(BaseTransformer):
|
|
353
363
|
cp.dump(self._sklearn_object, local_transform_file)
|
354
364
|
|
355
365
|
# Create temp stage to run fit.
|
356
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
366
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
357
367
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
358
368
|
SqlResultValidator(
|
359
369
|
session=session,
|
@@ -366,11 +376,12 @@ class ElasticNet(BaseTransformer):
|
|
366
376
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
367
377
|
).validate()
|
368
378
|
|
369
|
-
|
379
|
+
# Use posixpath to construct stage paths
|
380
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
381
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
370
382
|
local_result_file_name = get_temp_file_path()
|
371
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
372
383
|
|
373
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
384
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
374
385
|
statement_params = telemetry.get_function_usage_statement_params(
|
375
386
|
project=_PROJECT,
|
376
387
|
subproject=_SUBPROJECT,
|
@@ -396,6 +407,7 @@ class ElasticNet(BaseTransformer):
|
|
396
407
|
replace=True,
|
397
408
|
session=session,
|
398
409
|
statement_params=statement_params,
|
410
|
+
anonymous=True
|
399
411
|
)
|
400
412
|
def fit_wrapper_sproc(
|
401
413
|
session: Session,
|
@@ -404,7 +416,8 @@ class ElasticNet(BaseTransformer):
|
|
404
416
|
stage_result_file_name: str,
|
405
417
|
input_cols: List[str],
|
406
418
|
label_cols: List[str],
|
407
|
-
sample_weight_col: Optional[str]
|
419
|
+
sample_weight_col: Optional[str],
|
420
|
+
statement_params: Dict[str, str]
|
408
421
|
) -> str:
|
409
422
|
import cloudpickle as cp
|
410
423
|
import numpy as np
|
@@ -471,15 +484,15 @@ class ElasticNet(BaseTransformer):
|
|
471
484
|
api_calls=[Session.call],
|
472
485
|
custom_tags=dict([("autogen", True)]),
|
473
486
|
)
|
474
|
-
sproc_export_file_name =
|
475
|
-
|
487
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
488
|
+
session,
|
476
489
|
query,
|
477
490
|
stage_transform_file_name,
|
478
491
|
stage_result_file_name,
|
479
492
|
identifier.get_unescaped_names(self.input_cols),
|
480
493
|
identifier.get_unescaped_names(self.label_cols),
|
481
494
|
identifier.get_unescaped_names(self.sample_weight_col),
|
482
|
-
statement_params
|
495
|
+
statement_params,
|
483
496
|
)
|
484
497
|
|
485
498
|
if "|" in sproc_export_file_name:
|
@@ -489,7 +502,7 @@ class ElasticNet(BaseTransformer):
|
|
489
502
|
print("\n".join(fields[1:]))
|
490
503
|
|
491
504
|
session.file.get(
|
492
|
-
|
505
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
493
506
|
local_result_file_name,
|
494
507
|
statement_params=statement_params
|
495
508
|
)
|
@@ -535,7 +548,7 @@ class ElasticNet(BaseTransformer):
|
|
535
548
|
|
536
549
|
# Register vectorized UDF for batch inference
|
537
550
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
538
|
-
safe_id=self.
|
551
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
539
552
|
|
540
553
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
541
554
|
# will try to pickle all of self which fails.
|
@@ -627,7 +640,7 @@ class ElasticNet(BaseTransformer):
|
|
627
640
|
return transformed_pandas_df.to_dict("records")
|
628
641
|
|
629
642
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
630
|
-
safe_id=self.
|
643
|
+
safe_id=self._get_rand_id()
|
631
644
|
)
|
632
645
|
|
633
646
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -794,11 +807,18 @@ class ElasticNet(BaseTransformer):
|
|
794
807
|
Transformed dataset.
|
795
808
|
"""
|
796
809
|
if isinstance(dataset, DataFrame):
|
810
|
+
expected_type_inferred = "float"
|
811
|
+
# when it is classifier, infer the datatype from label columns
|
812
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
813
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
814
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
815
|
+
)
|
816
|
+
|
797
817
|
output_df = self._batch_inference(
|
798
818
|
dataset=dataset,
|
799
819
|
inference_method="predict",
|
800
820
|
expected_output_cols_list=self.output_cols,
|
801
|
-
expected_output_cols_type=
|
821
|
+
expected_output_cols_type=expected_type_inferred,
|
802
822
|
)
|
803
823
|
elif isinstance(dataset, pd.DataFrame):
|
804
824
|
output_df = self._sklearn_inference(
|
@@ -869,10 +889,10 @@ class ElasticNet(BaseTransformer):
|
|
869
889
|
|
870
890
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
871
891
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
872
|
-
Returns
|
892
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
873
893
|
"""
|
874
894
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
875
|
-
return []
|
895
|
+
return [output_cols_prefix]
|
876
896
|
|
877
897
|
classes = self._sklearn_object.classes_
|
878
898
|
if isinstance(classes, numpy.ndarray):
|
@@ -1097,7 +1117,7 @@ class ElasticNet(BaseTransformer):
|
|
1097
1117
|
cp.dump(self._sklearn_object, local_score_file)
|
1098
1118
|
|
1099
1119
|
# Create temp stage to run score.
|
1100
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1120
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1101
1121
|
session = dataset._session
|
1102
1122
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1103
1123
|
SqlResultValidator(
|
@@ -1111,8 +1131,9 @@ class ElasticNet(BaseTransformer):
|
|
1111
1131
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1112
1132
|
).validate()
|
1113
1133
|
|
1114
|
-
|
1115
|
-
|
1134
|
+
# Use posixpath to construct stage paths
|
1135
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1136
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1116
1137
|
statement_params = telemetry.get_function_usage_statement_params(
|
1117
1138
|
project=_PROJECT,
|
1118
1139
|
subproject=_SUBPROJECT,
|
@@ -1138,6 +1159,7 @@ class ElasticNet(BaseTransformer):
|
|
1138
1159
|
replace=True,
|
1139
1160
|
session=session,
|
1140
1161
|
statement_params=statement_params,
|
1162
|
+
anonymous=True
|
1141
1163
|
)
|
1142
1164
|
def score_wrapper_sproc(
|
1143
1165
|
session: Session,
|
@@ -1145,7 +1167,8 @@ class ElasticNet(BaseTransformer):
|
|
1145
1167
|
stage_score_file_name: str,
|
1146
1168
|
input_cols: List[str],
|
1147
1169
|
label_cols: List[str],
|
1148
|
-
sample_weight_col: Optional[str]
|
1170
|
+
sample_weight_col: Optional[str],
|
1171
|
+
statement_params: Dict[str, str]
|
1149
1172
|
) -> float:
|
1150
1173
|
import cloudpickle as cp
|
1151
1174
|
import numpy as np
|
@@ -1195,14 +1218,14 @@ class ElasticNet(BaseTransformer):
|
|
1195
1218
|
api_calls=[Session.call],
|
1196
1219
|
custom_tags=dict([("autogen", True)]),
|
1197
1220
|
)
|
1198
|
-
score =
|
1199
|
-
|
1221
|
+
score = score_wrapper_sproc(
|
1222
|
+
session,
|
1200
1223
|
query,
|
1201
1224
|
stage_score_file_name,
|
1202
1225
|
identifier.get_unescaped_names(self.input_cols),
|
1203
1226
|
identifier.get_unescaped_names(self.label_cols),
|
1204
1227
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1205
|
-
statement_params
|
1228
|
+
statement_params,
|
1206
1229
|
)
|
1207
1230
|
|
1208
1231
|
cleanup_temp_files([local_score_file_name])
|
@@ -1220,18 +1243,20 @@ class ElasticNet(BaseTransformer):
|
|
1220
1243
|
if self._sklearn_object._estimator_type == 'classifier':
|
1221
1244
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1222
1245
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1223
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1246
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1247
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1224
1248
|
# For regressor, the type of predict is float64
|
1225
1249
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1226
1250
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1227
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1228
|
-
|
1251
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1252
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1229
1253
|
for prob_func in PROB_FUNCTIONS:
|
1230
1254
|
if hasattr(self, prob_func):
|
1231
1255
|
output_cols_prefix: str = f"{prob_func}_"
|
1232
1256
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1233
1257
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1234
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1258
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1259
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1235
1260
|
|
1236
1261
|
@property
|
1237
1262
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|