snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -270,7 +272,6 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
270
272
|
sample_weight_col: Optional[str] = None,
|
271
273
|
) -> None:
|
272
274
|
super().__init__()
|
273
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
274
275
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
275
276
|
|
276
277
|
self._deps = list(deps)
|
@@ -298,6 +299,15 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
298
299
|
self.set_drop_input_cols(drop_input_cols)
|
299
300
|
self.set_sample_weight_col(sample_weight_col)
|
300
301
|
|
302
|
+
def _get_rand_id(self) -> str:
|
303
|
+
"""
|
304
|
+
Generate random id to be used in sproc and stage names.
|
305
|
+
|
306
|
+
Returns:
|
307
|
+
Random id string usable in sproc, table, and stage names.
|
308
|
+
"""
|
309
|
+
return str(uuid4()).replace("-", "_").upper()
|
310
|
+
|
301
311
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
302
312
|
"""
|
303
313
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -376,7 +386,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
376
386
|
cp.dump(self._sklearn_object, local_transform_file)
|
377
387
|
|
378
388
|
# Create temp stage to run fit.
|
379
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
389
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
380
390
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
381
391
|
SqlResultValidator(
|
382
392
|
session=session,
|
@@ -389,11 +399,12 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
389
399
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
390
400
|
).validate()
|
391
401
|
|
392
|
-
|
402
|
+
# Use posixpath to construct stage paths
|
403
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
404
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
393
405
|
local_result_file_name = get_temp_file_path()
|
394
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
395
406
|
|
396
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
407
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
397
408
|
statement_params = telemetry.get_function_usage_statement_params(
|
398
409
|
project=_PROJECT,
|
399
410
|
subproject=_SUBPROJECT,
|
@@ -419,6 +430,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
419
430
|
replace=True,
|
420
431
|
session=session,
|
421
432
|
statement_params=statement_params,
|
433
|
+
anonymous=True
|
422
434
|
)
|
423
435
|
def fit_wrapper_sproc(
|
424
436
|
session: Session,
|
@@ -427,7 +439,8 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
427
439
|
stage_result_file_name: str,
|
428
440
|
input_cols: List[str],
|
429
441
|
label_cols: List[str],
|
430
|
-
sample_weight_col: Optional[str]
|
442
|
+
sample_weight_col: Optional[str],
|
443
|
+
statement_params: Dict[str, str]
|
431
444
|
) -> str:
|
432
445
|
import cloudpickle as cp
|
433
446
|
import numpy as np
|
@@ -494,15 +507,15 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
494
507
|
api_calls=[Session.call],
|
495
508
|
custom_tags=dict([("autogen", True)]),
|
496
509
|
)
|
497
|
-
sproc_export_file_name =
|
498
|
-
|
510
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
511
|
+
session,
|
499
512
|
query,
|
500
513
|
stage_transform_file_name,
|
501
514
|
stage_result_file_name,
|
502
515
|
identifier.get_unescaped_names(self.input_cols),
|
503
516
|
identifier.get_unescaped_names(self.label_cols),
|
504
517
|
identifier.get_unescaped_names(self.sample_weight_col),
|
505
|
-
statement_params
|
518
|
+
statement_params,
|
506
519
|
)
|
507
520
|
|
508
521
|
if "|" in sproc_export_file_name:
|
@@ -512,7 +525,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
512
525
|
print("\n".join(fields[1:]))
|
513
526
|
|
514
527
|
session.file.get(
|
515
|
-
|
528
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
516
529
|
local_result_file_name,
|
517
530
|
statement_params=statement_params
|
518
531
|
)
|
@@ -558,7 +571,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
558
571
|
|
559
572
|
# Register vectorized UDF for batch inference
|
560
573
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
561
|
-
safe_id=self.
|
574
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
562
575
|
|
563
576
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
564
577
|
# will try to pickle all of self which fails.
|
@@ -650,7 +663,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
650
663
|
return transformed_pandas_df.to_dict("records")
|
651
664
|
|
652
665
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
653
|
-
safe_id=self.
|
666
|
+
safe_id=self._get_rand_id()
|
654
667
|
)
|
655
668
|
|
656
669
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -817,11 +830,18 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
817
830
|
Transformed dataset.
|
818
831
|
"""
|
819
832
|
if isinstance(dataset, DataFrame):
|
833
|
+
expected_type_inferred = ""
|
834
|
+
# when it is classifier, infer the datatype from label columns
|
835
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
836
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
837
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
838
|
+
)
|
839
|
+
|
820
840
|
output_df = self._batch_inference(
|
821
841
|
dataset=dataset,
|
822
842
|
inference_method="predict",
|
823
843
|
expected_output_cols_list=self.output_cols,
|
824
|
-
expected_output_cols_type=
|
844
|
+
expected_output_cols_type=expected_type_inferred,
|
825
845
|
)
|
826
846
|
elif isinstance(dataset, pd.DataFrame):
|
827
847
|
output_df = self._sklearn_inference(
|
@@ -892,10 +912,10 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
892
912
|
|
893
913
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
894
914
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
895
|
-
Returns
|
915
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
896
916
|
"""
|
897
917
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
898
|
-
return []
|
918
|
+
return [output_cols_prefix]
|
899
919
|
|
900
920
|
classes = self._sklearn_object.classes_
|
901
921
|
if isinstance(classes, numpy.ndarray):
|
@@ -1124,7 +1144,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1124
1144
|
cp.dump(self._sklearn_object, local_score_file)
|
1125
1145
|
|
1126
1146
|
# Create temp stage to run score.
|
1127
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1147
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1128
1148
|
session = dataset._session
|
1129
1149
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1130
1150
|
SqlResultValidator(
|
@@ -1138,8 +1158,9 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1138
1158
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1139
1159
|
).validate()
|
1140
1160
|
|
1141
|
-
|
1142
|
-
|
1161
|
+
# Use posixpath to construct stage paths
|
1162
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1163
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1143
1164
|
statement_params = telemetry.get_function_usage_statement_params(
|
1144
1165
|
project=_PROJECT,
|
1145
1166
|
subproject=_SUBPROJECT,
|
@@ -1165,6 +1186,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1165
1186
|
replace=True,
|
1166
1187
|
session=session,
|
1167
1188
|
statement_params=statement_params,
|
1189
|
+
anonymous=True
|
1168
1190
|
)
|
1169
1191
|
def score_wrapper_sproc(
|
1170
1192
|
session: Session,
|
@@ -1172,7 +1194,8 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1172
1194
|
stage_score_file_name: str,
|
1173
1195
|
input_cols: List[str],
|
1174
1196
|
label_cols: List[str],
|
1175
|
-
sample_weight_col: Optional[str]
|
1197
|
+
sample_weight_col: Optional[str],
|
1198
|
+
statement_params: Dict[str, str]
|
1176
1199
|
) -> float:
|
1177
1200
|
import cloudpickle as cp
|
1178
1201
|
import numpy as np
|
@@ -1222,14 +1245,14 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1222
1245
|
api_calls=[Session.call],
|
1223
1246
|
custom_tags=dict([("autogen", True)]),
|
1224
1247
|
)
|
1225
|
-
score =
|
1226
|
-
|
1248
|
+
score = score_wrapper_sproc(
|
1249
|
+
session,
|
1227
1250
|
query,
|
1228
1251
|
stage_score_file_name,
|
1229
1252
|
identifier.get_unescaped_names(self.input_cols),
|
1230
1253
|
identifier.get_unescaped_names(self.label_cols),
|
1231
1254
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1232
|
-
statement_params
|
1255
|
+
statement_params,
|
1233
1256
|
)
|
1234
1257
|
|
1235
1258
|
cleanup_temp_files([local_score_file_name])
|
@@ -1247,18 +1270,20 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1247
1270
|
if self._sklearn_object._estimator_type == 'classifier':
|
1248
1271
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1249
1272
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1250
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1273
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1274
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1251
1275
|
# For regressor, the type of predict is float64
|
1252
1276
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1253
1277
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1254
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1255
|
-
|
1278
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1279
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1256
1280
|
for prob_func in PROB_FUNCTIONS:
|
1257
1281
|
if hasattr(self, prob_func):
|
1258
1282
|
output_cols_prefix: str = f"{prob_func}_"
|
1259
1283
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1260
1284
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1261
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1285
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1286
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1262
1287
|
|
1263
1288
|
@property
|
1264
1289
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -255,7 +257,6 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
255
257
|
sample_weight_col: Optional[str] = None,
|
256
258
|
) -> None:
|
257
259
|
super().__init__()
|
258
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
259
260
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
260
261
|
|
261
262
|
self._deps = list(deps)
|
@@ -281,6 +282,15 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
281
282
|
self.set_drop_input_cols(drop_input_cols)
|
282
283
|
self.set_sample_weight_col(sample_weight_col)
|
283
284
|
|
285
|
+
def _get_rand_id(self) -> str:
|
286
|
+
"""
|
287
|
+
Generate random id to be used in sproc and stage names.
|
288
|
+
|
289
|
+
Returns:
|
290
|
+
Random id string usable in sproc, table, and stage names.
|
291
|
+
"""
|
292
|
+
return str(uuid4()).replace("-", "_").upper()
|
293
|
+
|
284
294
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
285
295
|
"""
|
286
296
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -359,7 +369,7 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
359
369
|
cp.dump(self._sklearn_object, local_transform_file)
|
360
370
|
|
361
371
|
# Create temp stage to run fit.
|
362
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
372
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
363
373
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
364
374
|
SqlResultValidator(
|
365
375
|
session=session,
|
@@ -372,11 +382,12 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
372
382
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
373
383
|
).validate()
|
374
384
|
|
375
|
-
|
385
|
+
# Use posixpath to construct stage paths
|
386
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
387
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
376
388
|
local_result_file_name = get_temp_file_path()
|
377
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
378
389
|
|
379
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
390
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
380
391
|
statement_params = telemetry.get_function_usage_statement_params(
|
381
392
|
project=_PROJECT,
|
382
393
|
subproject=_SUBPROJECT,
|
@@ -402,6 +413,7 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
402
413
|
replace=True,
|
403
414
|
session=session,
|
404
415
|
statement_params=statement_params,
|
416
|
+
anonymous=True
|
405
417
|
)
|
406
418
|
def fit_wrapper_sproc(
|
407
419
|
session: Session,
|
@@ -410,7 +422,8 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
410
422
|
stage_result_file_name: str,
|
411
423
|
input_cols: List[str],
|
412
424
|
label_cols: List[str],
|
413
|
-
sample_weight_col: Optional[str]
|
425
|
+
sample_weight_col: Optional[str],
|
426
|
+
statement_params: Dict[str, str]
|
414
427
|
) -> str:
|
415
428
|
import cloudpickle as cp
|
416
429
|
import numpy as np
|
@@ -477,15 +490,15 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
477
490
|
api_calls=[Session.call],
|
478
491
|
custom_tags=dict([("autogen", True)]),
|
479
492
|
)
|
480
|
-
sproc_export_file_name =
|
481
|
-
|
493
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
494
|
+
session,
|
482
495
|
query,
|
483
496
|
stage_transform_file_name,
|
484
497
|
stage_result_file_name,
|
485
498
|
identifier.get_unescaped_names(self.input_cols),
|
486
499
|
identifier.get_unescaped_names(self.label_cols),
|
487
500
|
identifier.get_unescaped_names(self.sample_weight_col),
|
488
|
-
statement_params
|
501
|
+
statement_params,
|
489
502
|
)
|
490
503
|
|
491
504
|
if "|" in sproc_export_file_name:
|
@@ -495,7 +508,7 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
495
508
|
print("\n".join(fields[1:]))
|
496
509
|
|
497
510
|
session.file.get(
|
498
|
-
|
511
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
499
512
|
local_result_file_name,
|
500
513
|
statement_params=statement_params
|
501
514
|
)
|
@@ -541,7 +554,7 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
541
554
|
|
542
555
|
# Register vectorized UDF for batch inference
|
543
556
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
544
|
-
safe_id=self.
|
557
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
545
558
|
|
546
559
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
547
560
|
# will try to pickle all of self which fails.
|
@@ -633,7 +646,7 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
633
646
|
return transformed_pandas_df.to_dict("records")
|
634
647
|
|
635
648
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
636
|
-
safe_id=self.
|
649
|
+
safe_id=self._get_rand_id()
|
637
650
|
)
|
638
651
|
|
639
652
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -800,11 +813,18 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
800
813
|
Transformed dataset.
|
801
814
|
"""
|
802
815
|
if isinstance(dataset, DataFrame):
|
816
|
+
expected_type_inferred = "float"
|
817
|
+
# when it is classifier, infer the datatype from label columns
|
818
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
819
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
820
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
821
|
+
)
|
822
|
+
|
803
823
|
output_df = self._batch_inference(
|
804
824
|
dataset=dataset,
|
805
825
|
inference_method="predict",
|
806
826
|
expected_output_cols_list=self.output_cols,
|
807
|
-
expected_output_cols_type=
|
827
|
+
expected_output_cols_type=expected_type_inferred,
|
808
828
|
)
|
809
829
|
elif isinstance(dataset, pd.DataFrame):
|
810
830
|
output_df = self._sklearn_inference(
|
@@ -875,10 +895,10 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
875
895
|
|
876
896
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
877
897
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
878
|
-
Returns
|
898
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
879
899
|
"""
|
880
900
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
881
|
-
return []
|
901
|
+
return [output_cols_prefix]
|
882
902
|
|
883
903
|
classes = self._sklearn_object.classes_
|
884
904
|
if isinstance(classes, numpy.ndarray):
|
@@ -1103,7 +1123,7 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
1103
1123
|
cp.dump(self._sklearn_object, local_score_file)
|
1104
1124
|
|
1105
1125
|
# Create temp stage to run score.
|
1106
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1126
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1107
1127
|
session = dataset._session
|
1108
1128
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1109
1129
|
SqlResultValidator(
|
@@ -1117,8 +1137,9 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
1117
1137
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1118
1138
|
).validate()
|
1119
1139
|
|
1120
|
-
|
1121
|
-
|
1140
|
+
# Use posixpath to construct stage paths
|
1141
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1142
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1122
1143
|
statement_params = telemetry.get_function_usage_statement_params(
|
1123
1144
|
project=_PROJECT,
|
1124
1145
|
subproject=_SUBPROJECT,
|
@@ -1144,6 +1165,7 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
1144
1165
|
replace=True,
|
1145
1166
|
session=session,
|
1146
1167
|
statement_params=statement_params,
|
1168
|
+
anonymous=True
|
1147
1169
|
)
|
1148
1170
|
def score_wrapper_sproc(
|
1149
1171
|
session: Session,
|
@@ -1151,7 +1173,8 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
1151
1173
|
stage_score_file_name: str,
|
1152
1174
|
input_cols: List[str],
|
1153
1175
|
label_cols: List[str],
|
1154
|
-
sample_weight_col: Optional[str]
|
1176
|
+
sample_weight_col: Optional[str],
|
1177
|
+
statement_params: Dict[str, str]
|
1155
1178
|
) -> float:
|
1156
1179
|
import cloudpickle as cp
|
1157
1180
|
import numpy as np
|
@@ -1201,14 +1224,14 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
1201
1224
|
api_calls=[Session.call],
|
1202
1225
|
custom_tags=dict([("autogen", True)]),
|
1203
1226
|
)
|
1204
|
-
score =
|
1205
|
-
|
1227
|
+
score = score_wrapper_sproc(
|
1228
|
+
session,
|
1206
1229
|
query,
|
1207
1230
|
stage_score_file_name,
|
1208
1231
|
identifier.get_unescaped_names(self.input_cols),
|
1209
1232
|
identifier.get_unescaped_names(self.label_cols),
|
1210
1233
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1211
|
-
statement_params
|
1234
|
+
statement_params,
|
1212
1235
|
)
|
1213
1236
|
|
1214
1237
|
cleanup_temp_files([local_score_file_name])
|
@@ -1226,18 +1249,20 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
1226
1249
|
if self._sklearn_object._estimator_type == 'classifier':
|
1227
1250
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1228
1251
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1229
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1252
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1253
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1230
1254
|
# For regressor, the type of predict is float64
|
1231
1255
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1232
1256
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1233
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1234
|
-
|
1257
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1258
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1235
1259
|
for prob_func in PROB_FUNCTIONS:
|
1236
1260
|
if hasattr(self, prob_func):
|
1237
1261
|
output_cols_prefix: str = f"{prob_func}_"
|
1238
1262
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1239
1263
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1240
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1264
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1265
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1241
1266
|
|
1242
1267
|
@property
|
1243
1268
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|