snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -237,7 +239,6 @@ class NuSVR(BaseTransformer):
|
|
237
239
|
sample_weight_col: Optional[str] = None,
|
238
240
|
) -> None:
|
239
241
|
super().__init__()
|
240
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
241
242
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
242
243
|
|
243
244
|
self._deps = list(deps)
|
@@ -267,6 +268,15 @@ class NuSVR(BaseTransformer):
|
|
267
268
|
self.set_drop_input_cols(drop_input_cols)
|
268
269
|
self.set_sample_weight_col(sample_weight_col)
|
269
270
|
|
271
|
+
def _get_rand_id(self) -> str:
|
272
|
+
"""
|
273
|
+
Generate random id to be used in sproc and stage names.
|
274
|
+
|
275
|
+
Returns:
|
276
|
+
Random id string usable in sproc, table, and stage names.
|
277
|
+
"""
|
278
|
+
return str(uuid4()).replace("-", "_").upper()
|
279
|
+
|
270
280
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
271
281
|
"""
|
272
282
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -345,7 +355,7 @@ class NuSVR(BaseTransformer):
|
|
345
355
|
cp.dump(self._sklearn_object, local_transform_file)
|
346
356
|
|
347
357
|
# Create temp stage to run fit.
|
348
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
358
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
349
359
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
350
360
|
SqlResultValidator(
|
351
361
|
session=session,
|
@@ -358,11 +368,12 @@ class NuSVR(BaseTransformer):
|
|
358
368
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
359
369
|
).validate()
|
360
370
|
|
361
|
-
|
371
|
+
# Use posixpath to construct stage paths
|
372
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
373
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
362
374
|
local_result_file_name = get_temp_file_path()
|
363
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
364
375
|
|
365
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
376
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
366
377
|
statement_params = telemetry.get_function_usage_statement_params(
|
367
378
|
project=_PROJECT,
|
368
379
|
subproject=_SUBPROJECT,
|
@@ -388,6 +399,7 @@ class NuSVR(BaseTransformer):
|
|
388
399
|
replace=True,
|
389
400
|
session=session,
|
390
401
|
statement_params=statement_params,
|
402
|
+
anonymous=True
|
391
403
|
)
|
392
404
|
def fit_wrapper_sproc(
|
393
405
|
session: Session,
|
@@ -396,7 +408,8 @@ class NuSVR(BaseTransformer):
|
|
396
408
|
stage_result_file_name: str,
|
397
409
|
input_cols: List[str],
|
398
410
|
label_cols: List[str],
|
399
|
-
sample_weight_col: Optional[str]
|
411
|
+
sample_weight_col: Optional[str],
|
412
|
+
statement_params: Dict[str, str]
|
400
413
|
) -> str:
|
401
414
|
import cloudpickle as cp
|
402
415
|
import numpy as np
|
@@ -463,15 +476,15 @@ class NuSVR(BaseTransformer):
|
|
463
476
|
api_calls=[Session.call],
|
464
477
|
custom_tags=dict([("autogen", True)]),
|
465
478
|
)
|
466
|
-
sproc_export_file_name =
|
467
|
-
|
479
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
480
|
+
session,
|
468
481
|
query,
|
469
482
|
stage_transform_file_name,
|
470
483
|
stage_result_file_name,
|
471
484
|
identifier.get_unescaped_names(self.input_cols),
|
472
485
|
identifier.get_unescaped_names(self.label_cols),
|
473
486
|
identifier.get_unescaped_names(self.sample_weight_col),
|
474
|
-
statement_params
|
487
|
+
statement_params,
|
475
488
|
)
|
476
489
|
|
477
490
|
if "|" in sproc_export_file_name:
|
@@ -481,7 +494,7 @@ class NuSVR(BaseTransformer):
|
|
481
494
|
print("\n".join(fields[1:]))
|
482
495
|
|
483
496
|
session.file.get(
|
484
|
-
|
497
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
485
498
|
local_result_file_name,
|
486
499
|
statement_params=statement_params
|
487
500
|
)
|
@@ -527,7 +540,7 @@ class NuSVR(BaseTransformer):
|
|
527
540
|
|
528
541
|
# Register vectorized UDF for batch inference
|
529
542
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
530
|
-
safe_id=self.
|
543
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
531
544
|
|
532
545
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
533
546
|
# will try to pickle all of self which fails.
|
@@ -619,7 +632,7 @@ class NuSVR(BaseTransformer):
|
|
619
632
|
return transformed_pandas_df.to_dict("records")
|
620
633
|
|
621
634
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
622
|
-
safe_id=self.
|
635
|
+
safe_id=self._get_rand_id()
|
623
636
|
)
|
624
637
|
|
625
638
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -786,11 +799,18 @@ class NuSVR(BaseTransformer):
|
|
786
799
|
Transformed dataset.
|
787
800
|
"""
|
788
801
|
if isinstance(dataset, DataFrame):
|
802
|
+
expected_type_inferred = "float"
|
803
|
+
# when it is classifier, infer the datatype from label columns
|
804
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
805
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
806
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
807
|
+
)
|
808
|
+
|
789
809
|
output_df = self._batch_inference(
|
790
810
|
dataset=dataset,
|
791
811
|
inference_method="predict",
|
792
812
|
expected_output_cols_list=self.output_cols,
|
793
|
-
expected_output_cols_type=
|
813
|
+
expected_output_cols_type=expected_type_inferred,
|
794
814
|
)
|
795
815
|
elif isinstance(dataset, pd.DataFrame):
|
796
816
|
output_df = self._sklearn_inference(
|
@@ -861,10 +881,10 @@ class NuSVR(BaseTransformer):
|
|
861
881
|
|
862
882
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
863
883
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
864
|
-
Returns
|
884
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
865
885
|
"""
|
866
886
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
867
|
-
return []
|
887
|
+
return [output_cols_prefix]
|
868
888
|
|
869
889
|
classes = self._sklearn_object.classes_
|
870
890
|
if isinstance(classes, numpy.ndarray):
|
@@ -1089,7 +1109,7 @@ class NuSVR(BaseTransformer):
|
|
1089
1109
|
cp.dump(self._sklearn_object, local_score_file)
|
1090
1110
|
|
1091
1111
|
# Create temp stage to run score.
|
1092
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1112
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1093
1113
|
session = dataset._session
|
1094
1114
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1095
1115
|
SqlResultValidator(
|
@@ -1103,8 +1123,9 @@ class NuSVR(BaseTransformer):
|
|
1103
1123
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1104
1124
|
).validate()
|
1105
1125
|
|
1106
|
-
|
1107
|
-
|
1126
|
+
# Use posixpath to construct stage paths
|
1127
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1128
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1108
1129
|
statement_params = telemetry.get_function_usage_statement_params(
|
1109
1130
|
project=_PROJECT,
|
1110
1131
|
subproject=_SUBPROJECT,
|
@@ -1130,6 +1151,7 @@ class NuSVR(BaseTransformer):
|
|
1130
1151
|
replace=True,
|
1131
1152
|
session=session,
|
1132
1153
|
statement_params=statement_params,
|
1154
|
+
anonymous=True
|
1133
1155
|
)
|
1134
1156
|
def score_wrapper_sproc(
|
1135
1157
|
session: Session,
|
@@ -1137,7 +1159,8 @@ class NuSVR(BaseTransformer):
|
|
1137
1159
|
stage_score_file_name: str,
|
1138
1160
|
input_cols: List[str],
|
1139
1161
|
label_cols: List[str],
|
1140
|
-
sample_weight_col: Optional[str]
|
1162
|
+
sample_weight_col: Optional[str],
|
1163
|
+
statement_params: Dict[str, str]
|
1141
1164
|
) -> float:
|
1142
1165
|
import cloudpickle as cp
|
1143
1166
|
import numpy as np
|
@@ -1187,14 +1210,14 @@ class NuSVR(BaseTransformer):
|
|
1187
1210
|
api_calls=[Session.call],
|
1188
1211
|
custom_tags=dict([("autogen", True)]),
|
1189
1212
|
)
|
1190
|
-
score =
|
1191
|
-
|
1213
|
+
score = score_wrapper_sproc(
|
1214
|
+
session,
|
1192
1215
|
query,
|
1193
1216
|
stage_score_file_name,
|
1194
1217
|
identifier.get_unescaped_names(self.input_cols),
|
1195
1218
|
identifier.get_unescaped_names(self.label_cols),
|
1196
1219
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1197
|
-
statement_params
|
1220
|
+
statement_params,
|
1198
1221
|
)
|
1199
1222
|
|
1200
1223
|
cleanup_temp_files([local_score_file_name])
|
@@ -1212,18 +1235,20 @@ class NuSVR(BaseTransformer):
|
|
1212
1235
|
if self._sklearn_object._estimator_type == 'classifier':
|
1213
1236
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1214
1237
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1215
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1238
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1239
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1216
1240
|
# For regressor, the type of predict is float64
|
1217
1241
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1218
1242
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1219
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1220
|
-
|
1243
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1244
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1221
1245
|
for prob_func in PROB_FUNCTIONS:
|
1222
1246
|
if hasattr(self, prob_func):
|
1223
1247
|
output_cols_prefix: str = f"{prob_func}_"
|
1224
1248
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1225
1249
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1226
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1250
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1251
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1227
1252
|
|
1228
1253
|
@property
|
1229
1254
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
snowflake/ml/modeling/svm/svc.py
CHANGED
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -275,7 +277,6 @@ class SVC(BaseTransformer):
|
|
275
277
|
sample_weight_col: Optional[str] = None,
|
276
278
|
) -> None:
|
277
279
|
super().__init__()
|
278
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
279
280
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
280
281
|
|
281
282
|
self._deps = list(deps)
|
@@ -309,6 +310,15 @@ class SVC(BaseTransformer):
|
|
309
310
|
self.set_drop_input_cols(drop_input_cols)
|
310
311
|
self.set_sample_weight_col(sample_weight_col)
|
311
312
|
|
313
|
+
def _get_rand_id(self) -> str:
|
314
|
+
"""
|
315
|
+
Generate random id to be used in sproc and stage names.
|
316
|
+
|
317
|
+
Returns:
|
318
|
+
Random id string usable in sproc, table, and stage names.
|
319
|
+
"""
|
320
|
+
return str(uuid4()).replace("-", "_").upper()
|
321
|
+
|
312
322
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
313
323
|
"""
|
314
324
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -387,7 +397,7 @@ class SVC(BaseTransformer):
|
|
387
397
|
cp.dump(self._sklearn_object, local_transform_file)
|
388
398
|
|
389
399
|
# Create temp stage to run fit.
|
390
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
400
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
391
401
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
392
402
|
SqlResultValidator(
|
393
403
|
session=session,
|
@@ -400,11 +410,12 @@ class SVC(BaseTransformer):
|
|
400
410
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
401
411
|
).validate()
|
402
412
|
|
403
|
-
|
413
|
+
# Use posixpath to construct stage paths
|
414
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
415
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
404
416
|
local_result_file_name = get_temp_file_path()
|
405
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
406
417
|
|
407
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
418
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
408
419
|
statement_params = telemetry.get_function_usage_statement_params(
|
409
420
|
project=_PROJECT,
|
410
421
|
subproject=_SUBPROJECT,
|
@@ -430,6 +441,7 @@ class SVC(BaseTransformer):
|
|
430
441
|
replace=True,
|
431
442
|
session=session,
|
432
443
|
statement_params=statement_params,
|
444
|
+
anonymous=True
|
433
445
|
)
|
434
446
|
def fit_wrapper_sproc(
|
435
447
|
session: Session,
|
@@ -438,7 +450,8 @@ class SVC(BaseTransformer):
|
|
438
450
|
stage_result_file_name: str,
|
439
451
|
input_cols: List[str],
|
440
452
|
label_cols: List[str],
|
441
|
-
sample_weight_col: Optional[str]
|
453
|
+
sample_weight_col: Optional[str],
|
454
|
+
statement_params: Dict[str, str]
|
442
455
|
) -> str:
|
443
456
|
import cloudpickle as cp
|
444
457
|
import numpy as np
|
@@ -505,15 +518,15 @@ class SVC(BaseTransformer):
|
|
505
518
|
api_calls=[Session.call],
|
506
519
|
custom_tags=dict([("autogen", True)]),
|
507
520
|
)
|
508
|
-
sproc_export_file_name =
|
509
|
-
|
521
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
522
|
+
session,
|
510
523
|
query,
|
511
524
|
stage_transform_file_name,
|
512
525
|
stage_result_file_name,
|
513
526
|
identifier.get_unescaped_names(self.input_cols),
|
514
527
|
identifier.get_unescaped_names(self.label_cols),
|
515
528
|
identifier.get_unescaped_names(self.sample_weight_col),
|
516
|
-
statement_params
|
529
|
+
statement_params,
|
517
530
|
)
|
518
531
|
|
519
532
|
if "|" in sproc_export_file_name:
|
@@ -523,7 +536,7 @@ class SVC(BaseTransformer):
|
|
523
536
|
print("\n".join(fields[1:]))
|
524
537
|
|
525
538
|
session.file.get(
|
526
|
-
|
539
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
527
540
|
local_result_file_name,
|
528
541
|
statement_params=statement_params
|
529
542
|
)
|
@@ -569,7 +582,7 @@ class SVC(BaseTransformer):
|
|
569
582
|
|
570
583
|
# Register vectorized UDF for batch inference
|
571
584
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
572
|
-
safe_id=self.
|
585
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
573
586
|
|
574
587
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
575
588
|
# will try to pickle all of self which fails.
|
@@ -661,7 +674,7 @@ class SVC(BaseTransformer):
|
|
661
674
|
return transformed_pandas_df.to_dict("records")
|
662
675
|
|
663
676
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
664
|
-
safe_id=self.
|
677
|
+
safe_id=self._get_rand_id()
|
665
678
|
)
|
666
679
|
|
667
680
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -828,11 +841,18 @@ class SVC(BaseTransformer):
|
|
828
841
|
Transformed dataset.
|
829
842
|
"""
|
830
843
|
if isinstance(dataset, DataFrame):
|
844
|
+
expected_type_inferred = ""
|
845
|
+
# when it is classifier, infer the datatype from label columns
|
846
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
847
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
848
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
849
|
+
)
|
850
|
+
|
831
851
|
output_df = self._batch_inference(
|
832
852
|
dataset=dataset,
|
833
853
|
inference_method="predict",
|
834
854
|
expected_output_cols_list=self.output_cols,
|
835
|
-
expected_output_cols_type=
|
855
|
+
expected_output_cols_type=expected_type_inferred,
|
836
856
|
)
|
837
857
|
elif isinstance(dataset, pd.DataFrame):
|
838
858
|
output_df = self._sklearn_inference(
|
@@ -903,10 +923,10 @@ class SVC(BaseTransformer):
|
|
903
923
|
|
904
924
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
905
925
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
906
|
-
Returns
|
926
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
907
927
|
"""
|
908
928
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
909
|
-
return []
|
929
|
+
return [output_cols_prefix]
|
910
930
|
|
911
931
|
classes = self._sklearn_object.classes_
|
912
932
|
if isinstance(classes, numpy.ndarray):
|
@@ -1137,7 +1157,7 @@ class SVC(BaseTransformer):
|
|
1137
1157
|
cp.dump(self._sklearn_object, local_score_file)
|
1138
1158
|
|
1139
1159
|
# Create temp stage to run score.
|
1140
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1160
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1141
1161
|
session = dataset._session
|
1142
1162
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1143
1163
|
SqlResultValidator(
|
@@ -1151,8 +1171,9 @@ class SVC(BaseTransformer):
|
|
1151
1171
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1152
1172
|
).validate()
|
1153
1173
|
|
1154
|
-
|
1155
|
-
|
1174
|
+
# Use posixpath to construct stage paths
|
1175
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1176
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1156
1177
|
statement_params = telemetry.get_function_usage_statement_params(
|
1157
1178
|
project=_PROJECT,
|
1158
1179
|
subproject=_SUBPROJECT,
|
@@ -1178,6 +1199,7 @@ class SVC(BaseTransformer):
|
|
1178
1199
|
replace=True,
|
1179
1200
|
session=session,
|
1180
1201
|
statement_params=statement_params,
|
1202
|
+
anonymous=True
|
1181
1203
|
)
|
1182
1204
|
def score_wrapper_sproc(
|
1183
1205
|
session: Session,
|
@@ -1185,7 +1207,8 @@ class SVC(BaseTransformer):
|
|
1185
1207
|
stage_score_file_name: str,
|
1186
1208
|
input_cols: List[str],
|
1187
1209
|
label_cols: List[str],
|
1188
|
-
sample_weight_col: Optional[str]
|
1210
|
+
sample_weight_col: Optional[str],
|
1211
|
+
statement_params: Dict[str, str]
|
1189
1212
|
) -> float:
|
1190
1213
|
import cloudpickle as cp
|
1191
1214
|
import numpy as np
|
@@ -1235,14 +1258,14 @@ class SVC(BaseTransformer):
|
|
1235
1258
|
api_calls=[Session.call],
|
1236
1259
|
custom_tags=dict([("autogen", True)]),
|
1237
1260
|
)
|
1238
|
-
score =
|
1239
|
-
|
1261
|
+
score = score_wrapper_sproc(
|
1262
|
+
session,
|
1240
1263
|
query,
|
1241
1264
|
stage_score_file_name,
|
1242
1265
|
identifier.get_unescaped_names(self.input_cols),
|
1243
1266
|
identifier.get_unescaped_names(self.label_cols),
|
1244
1267
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1245
|
-
statement_params
|
1268
|
+
statement_params,
|
1246
1269
|
)
|
1247
1270
|
|
1248
1271
|
cleanup_temp_files([local_score_file_name])
|
@@ -1260,18 +1283,20 @@ class SVC(BaseTransformer):
|
|
1260
1283
|
if self._sklearn_object._estimator_type == 'classifier':
|
1261
1284
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1262
1285
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1263
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1286
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1287
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1264
1288
|
# For regressor, the type of predict is float64
|
1265
1289
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1266
1290
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1267
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1268
|
-
|
1291
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1292
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1269
1293
|
for prob_func in PROB_FUNCTIONS:
|
1270
1294
|
if hasattr(self, prob_func):
|
1271
1295
|
output_cols_prefix: str = f"{prob_func}_"
|
1272
1296
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1273
1297
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1274
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1298
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1299
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1275
1300
|
|
1276
1301
|
@property
|
1277
1302
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|