snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -239,7 +241,6 @@ class LinearSVR(BaseTransformer):
|
|
239
241
|
sample_weight_col: Optional[str] = None,
|
240
242
|
) -> None:
|
241
243
|
super().__init__()
|
242
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
243
244
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
244
245
|
|
245
246
|
self._deps = list(deps)
|
@@ -268,6 +269,15 @@ class LinearSVR(BaseTransformer):
|
|
268
269
|
self.set_drop_input_cols(drop_input_cols)
|
269
270
|
self.set_sample_weight_col(sample_weight_col)
|
270
271
|
|
272
|
+
def _get_rand_id(self) -> str:
|
273
|
+
"""
|
274
|
+
Generate random id to be used in sproc and stage names.
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
Random id string usable in sproc, table, and stage names.
|
278
|
+
"""
|
279
|
+
return str(uuid4()).replace("-", "_").upper()
|
280
|
+
|
271
281
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
272
282
|
"""
|
273
283
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -346,7 +356,7 @@ class LinearSVR(BaseTransformer):
|
|
346
356
|
cp.dump(self._sklearn_object, local_transform_file)
|
347
357
|
|
348
358
|
# Create temp stage to run fit.
|
349
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
359
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
350
360
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
351
361
|
SqlResultValidator(
|
352
362
|
session=session,
|
@@ -359,11 +369,12 @@ class LinearSVR(BaseTransformer):
|
|
359
369
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
360
370
|
).validate()
|
361
371
|
|
362
|
-
|
372
|
+
# Use posixpath to construct stage paths
|
373
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
374
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
363
375
|
local_result_file_name = get_temp_file_path()
|
364
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
365
376
|
|
366
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
377
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
367
378
|
statement_params = telemetry.get_function_usage_statement_params(
|
368
379
|
project=_PROJECT,
|
369
380
|
subproject=_SUBPROJECT,
|
@@ -389,6 +400,7 @@ class LinearSVR(BaseTransformer):
|
|
389
400
|
replace=True,
|
390
401
|
session=session,
|
391
402
|
statement_params=statement_params,
|
403
|
+
anonymous=True
|
392
404
|
)
|
393
405
|
def fit_wrapper_sproc(
|
394
406
|
session: Session,
|
@@ -397,7 +409,8 @@ class LinearSVR(BaseTransformer):
|
|
397
409
|
stage_result_file_name: str,
|
398
410
|
input_cols: List[str],
|
399
411
|
label_cols: List[str],
|
400
|
-
sample_weight_col: Optional[str]
|
412
|
+
sample_weight_col: Optional[str],
|
413
|
+
statement_params: Dict[str, str]
|
401
414
|
) -> str:
|
402
415
|
import cloudpickle as cp
|
403
416
|
import numpy as np
|
@@ -464,15 +477,15 @@ class LinearSVR(BaseTransformer):
|
|
464
477
|
api_calls=[Session.call],
|
465
478
|
custom_tags=dict([("autogen", True)]),
|
466
479
|
)
|
467
|
-
sproc_export_file_name =
|
468
|
-
|
480
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
481
|
+
session,
|
469
482
|
query,
|
470
483
|
stage_transform_file_name,
|
471
484
|
stage_result_file_name,
|
472
485
|
identifier.get_unescaped_names(self.input_cols),
|
473
486
|
identifier.get_unescaped_names(self.label_cols),
|
474
487
|
identifier.get_unescaped_names(self.sample_weight_col),
|
475
|
-
statement_params
|
488
|
+
statement_params,
|
476
489
|
)
|
477
490
|
|
478
491
|
if "|" in sproc_export_file_name:
|
@@ -482,7 +495,7 @@ class LinearSVR(BaseTransformer):
|
|
482
495
|
print("\n".join(fields[1:]))
|
483
496
|
|
484
497
|
session.file.get(
|
485
|
-
|
498
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
486
499
|
local_result_file_name,
|
487
500
|
statement_params=statement_params
|
488
501
|
)
|
@@ -528,7 +541,7 @@ class LinearSVR(BaseTransformer):
|
|
528
541
|
|
529
542
|
# Register vectorized UDF for batch inference
|
530
543
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
531
|
-
safe_id=self.
|
544
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
532
545
|
|
533
546
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
534
547
|
# will try to pickle all of self which fails.
|
@@ -620,7 +633,7 @@ class LinearSVR(BaseTransformer):
|
|
620
633
|
return transformed_pandas_df.to_dict("records")
|
621
634
|
|
622
635
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
623
|
-
safe_id=self.
|
636
|
+
safe_id=self._get_rand_id()
|
624
637
|
)
|
625
638
|
|
626
639
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -787,11 +800,18 @@ class LinearSVR(BaseTransformer):
|
|
787
800
|
Transformed dataset.
|
788
801
|
"""
|
789
802
|
if isinstance(dataset, DataFrame):
|
803
|
+
expected_type_inferred = "float"
|
804
|
+
# when it is classifier, infer the datatype from label columns
|
805
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
806
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
807
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
808
|
+
)
|
809
|
+
|
790
810
|
output_df = self._batch_inference(
|
791
811
|
dataset=dataset,
|
792
812
|
inference_method="predict",
|
793
813
|
expected_output_cols_list=self.output_cols,
|
794
|
-
expected_output_cols_type=
|
814
|
+
expected_output_cols_type=expected_type_inferred,
|
795
815
|
)
|
796
816
|
elif isinstance(dataset, pd.DataFrame):
|
797
817
|
output_df = self._sklearn_inference(
|
@@ -862,10 +882,10 @@ class LinearSVR(BaseTransformer):
|
|
862
882
|
|
863
883
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
864
884
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
865
|
-
Returns
|
885
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
866
886
|
"""
|
867
887
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
868
|
-
return []
|
888
|
+
return [output_cols_prefix]
|
869
889
|
|
870
890
|
classes = self._sklearn_object.classes_
|
871
891
|
if isinstance(classes, numpy.ndarray):
|
@@ -1090,7 +1110,7 @@ class LinearSVR(BaseTransformer):
|
|
1090
1110
|
cp.dump(self._sklearn_object, local_score_file)
|
1091
1111
|
|
1092
1112
|
# Create temp stage to run score.
|
1093
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1113
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1094
1114
|
session = dataset._session
|
1095
1115
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1096
1116
|
SqlResultValidator(
|
@@ -1104,8 +1124,9 @@ class LinearSVR(BaseTransformer):
|
|
1104
1124
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1105
1125
|
).validate()
|
1106
1126
|
|
1107
|
-
|
1108
|
-
|
1127
|
+
# Use posixpath to construct stage paths
|
1128
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1129
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1109
1130
|
statement_params = telemetry.get_function_usage_statement_params(
|
1110
1131
|
project=_PROJECT,
|
1111
1132
|
subproject=_SUBPROJECT,
|
@@ -1131,6 +1152,7 @@ class LinearSVR(BaseTransformer):
|
|
1131
1152
|
replace=True,
|
1132
1153
|
session=session,
|
1133
1154
|
statement_params=statement_params,
|
1155
|
+
anonymous=True
|
1134
1156
|
)
|
1135
1157
|
def score_wrapper_sproc(
|
1136
1158
|
session: Session,
|
@@ -1138,7 +1160,8 @@ class LinearSVR(BaseTransformer):
|
|
1138
1160
|
stage_score_file_name: str,
|
1139
1161
|
input_cols: List[str],
|
1140
1162
|
label_cols: List[str],
|
1141
|
-
sample_weight_col: Optional[str]
|
1163
|
+
sample_weight_col: Optional[str],
|
1164
|
+
statement_params: Dict[str, str]
|
1142
1165
|
) -> float:
|
1143
1166
|
import cloudpickle as cp
|
1144
1167
|
import numpy as np
|
@@ -1188,14 +1211,14 @@ class LinearSVR(BaseTransformer):
|
|
1188
1211
|
api_calls=[Session.call],
|
1189
1212
|
custom_tags=dict([("autogen", True)]),
|
1190
1213
|
)
|
1191
|
-
score =
|
1192
|
-
|
1214
|
+
score = score_wrapper_sproc(
|
1215
|
+
session,
|
1193
1216
|
query,
|
1194
1217
|
stage_score_file_name,
|
1195
1218
|
identifier.get_unescaped_names(self.input_cols),
|
1196
1219
|
identifier.get_unescaped_names(self.label_cols),
|
1197
1220
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1198
|
-
statement_params
|
1221
|
+
statement_params,
|
1199
1222
|
)
|
1200
1223
|
|
1201
1224
|
cleanup_temp_files([local_score_file_name])
|
@@ -1213,18 +1236,20 @@ class LinearSVR(BaseTransformer):
|
|
1213
1236
|
if self._sklearn_object._estimator_type == 'classifier':
|
1214
1237
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1215
1238
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1216
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1239
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1240
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1217
1241
|
# For regressor, the type of predict is float64
|
1218
1242
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1219
1243
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1220
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
-
|
1244
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1245
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1222
1246
|
for prob_func in PROB_FUNCTIONS:
|
1223
1247
|
if hasattr(self, prob_func):
|
1224
1248
|
output_cols_prefix: str = f"{prob_func}_"
|
1225
1249
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1226
1250
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1227
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1251
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1252
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1228
1253
|
|
1229
1254
|
@property
|
1230
1255
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -272,7 +274,6 @@ class NuSVC(BaseTransformer):
|
|
272
274
|
sample_weight_col: Optional[str] = None,
|
273
275
|
) -> None:
|
274
276
|
super().__init__()
|
275
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
276
277
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
277
278
|
|
278
279
|
self._deps = list(deps)
|
@@ -306,6 +307,15 @@ class NuSVC(BaseTransformer):
|
|
306
307
|
self.set_drop_input_cols(drop_input_cols)
|
307
308
|
self.set_sample_weight_col(sample_weight_col)
|
308
309
|
|
310
|
+
def _get_rand_id(self) -> str:
|
311
|
+
"""
|
312
|
+
Generate random id to be used in sproc and stage names.
|
313
|
+
|
314
|
+
Returns:
|
315
|
+
Random id string usable in sproc, table, and stage names.
|
316
|
+
"""
|
317
|
+
return str(uuid4()).replace("-", "_").upper()
|
318
|
+
|
309
319
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
310
320
|
"""
|
311
321
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -384,7 +394,7 @@ class NuSVC(BaseTransformer):
|
|
384
394
|
cp.dump(self._sklearn_object, local_transform_file)
|
385
395
|
|
386
396
|
# Create temp stage to run fit.
|
387
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
397
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
388
398
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
389
399
|
SqlResultValidator(
|
390
400
|
session=session,
|
@@ -397,11 +407,12 @@ class NuSVC(BaseTransformer):
|
|
397
407
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
398
408
|
).validate()
|
399
409
|
|
400
|
-
|
410
|
+
# Use posixpath to construct stage paths
|
411
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
412
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
401
413
|
local_result_file_name = get_temp_file_path()
|
402
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
403
414
|
|
404
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
415
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
405
416
|
statement_params = telemetry.get_function_usage_statement_params(
|
406
417
|
project=_PROJECT,
|
407
418
|
subproject=_SUBPROJECT,
|
@@ -427,6 +438,7 @@ class NuSVC(BaseTransformer):
|
|
427
438
|
replace=True,
|
428
439
|
session=session,
|
429
440
|
statement_params=statement_params,
|
441
|
+
anonymous=True
|
430
442
|
)
|
431
443
|
def fit_wrapper_sproc(
|
432
444
|
session: Session,
|
@@ -435,7 +447,8 @@ class NuSVC(BaseTransformer):
|
|
435
447
|
stage_result_file_name: str,
|
436
448
|
input_cols: List[str],
|
437
449
|
label_cols: List[str],
|
438
|
-
sample_weight_col: Optional[str]
|
450
|
+
sample_weight_col: Optional[str],
|
451
|
+
statement_params: Dict[str, str]
|
439
452
|
) -> str:
|
440
453
|
import cloudpickle as cp
|
441
454
|
import numpy as np
|
@@ -502,15 +515,15 @@ class NuSVC(BaseTransformer):
|
|
502
515
|
api_calls=[Session.call],
|
503
516
|
custom_tags=dict([("autogen", True)]),
|
504
517
|
)
|
505
|
-
sproc_export_file_name =
|
506
|
-
|
518
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
519
|
+
session,
|
507
520
|
query,
|
508
521
|
stage_transform_file_name,
|
509
522
|
stage_result_file_name,
|
510
523
|
identifier.get_unescaped_names(self.input_cols),
|
511
524
|
identifier.get_unescaped_names(self.label_cols),
|
512
525
|
identifier.get_unescaped_names(self.sample_weight_col),
|
513
|
-
statement_params
|
526
|
+
statement_params,
|
514
527
|
)
|
515
528
|
|
516
529
|
if "|" in sproc_export_file_name:
|
@@ -520,7 +533,7 @@ class NuSVC(BaseTransformer):
|
|
520
533
|
print("\n".join(fields[1:]))
|
521
534
|
|
522
535
|
session.file.get(
|
523
|
-
|
536
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
524
537
|
local_result_file_name,
|
525
538
|
statement_params=statement_params
|
526
539
|
)
|
@@ -566,7 +579,7 @@ class NuSVC(BaseTransformer):
|
|
566
579
|
|
567
580
|
# Register vectorized UDF for batch inference
|
568
581
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
569
|
-
safe_id=self.
|
582
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
570
583
|
|
571
584
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
572
585
|
# will try to pickle all of self which fails.
|
@@ -658,7 +671,7 @@ class NuSVC(BaseTransformer):
|
|
658
671
|
return transformed_pandas_df.to_dict("records")
|
659
672
|
|
660
673
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
661
|
-
safe_id=self.
|
674
|
+
safe_id=self._get_rand_id()
|
662
675
|
)
|
663
676
|
|
664
677
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -825,11 +838,18 @@ class NuSVC(BaseTransformer):
|
|
825
838
|
Transformed dataset.
|
826
839
|
"""
|
827
840
|
if isinstance(dataset, DataFrame):
|
841
|
+
expected_type_inferred = ""
|
842
|
+
# when it is classifier, infer the datatype from label columns
|
843
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
844
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
845
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
846
|
+
)
|
847
|
+
|
828
848
|
output_df = self._batch_inference(
|
829
849
|
dataset=dataset,
|
830
850
|
inference_method="predict",
|
831
851
|
expected_output_cols_list=self.output_cols,
|
832
|
-
expected_output_cols_type=
|
852
|
+
expected_output_cols_type=expected_type_inferred,
|
833
853
|
)
|
834
854
|
elif isinstance(dataset, pd.DataFrame):
|
835
855
|
output_df = self._sklearn_inference(
|
@@ -900,10 +920,10 @@ class NuSVC(BaseTransformer):
|
|
900
920
|
|
901
921
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
902
922
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
903
|
-
Returns
|
923
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
904
924
|
"""
|
905
925
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
906
|
-
return []
|
926
|
+
return [output_cols_prefix]
|
907
927
|
|
908
928
|
classes = self._sklearn_object.classes_
|
909
929
|
if isinstance(classes, numpy.ndarray):
|
@@ -1134,7 +1154,7 @@ class NuSVC(BaseTransformer):
|
|
1134
1154
|
cp.dump(self._sklearn_object, local_score_file)
|
1135
1155
|
|
1136
1156
|
# Create temp stage to run score.
|
1137
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1157
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1138
1158
|
session = dataset._session
|
1139
1159
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1140
1160
|
SqlResultValidator(
|
@@ -1148,8 +1168,9 @@ class NuSVC(BaseTransformer):
|
|
1148
1168
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1149
1169
|
).validate()
|
1150
1170
|
|
1151
|
-
|
1152
|
-
|
1171
|
+
# Use posixpath to construct stage paths
|
1172
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1173
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1153
1174
|
statement_params = telemetry.get_function_usage_statement_params(
|
1154
1175
|
project=_PROJECT,
|
1155
1176
|
subproject=_SUBPROJECT,
|
@@ -1175,6 +1196,7 @@ class NuSVC(BaseTransformer):
|
|
1175
1196
|
replace=True,
|
1176
1197
|
session=session,
|
1177
1198
|
statement_params=statement_params,
|
1199
|
+
anonymous=True
|
1178
1200
|
)
|
1179
1201
|
def score_wrapper_sproc(
|
1180
1202
|
session: Session,
|
@@ -1182,7 +1204,8 @@ class NuSVC(BaseTransformer):
|
|
1182
1204
|
stage_score_file_name: str,
|
1183
1205
|
input_cols: List[str],
|
1184
1206
|
label_cols: List[str],
|
1185
|
-
sample_weight_col: Optional[str]
|
1207
|
+
sample_weight_col: Optional[str],
|
1208
|
+
statement_params: Dict[str, str]
|
1186
1209
|
) -> float:
|
1187
1210
|
import cloudpickle as cp
|
1188
1211
|
import numpy as np
|
@@ -1232,14 +1255,14 @@ class NuSVC(BaseTransformer):
|
|
1232
1255
|
api_calls=[Session.call],
|
1233
1256
|
custom_tags=dict([("autogen", True)]),
|
1234
1257
|
)
|
1235
|
-
score =
|
1236
|
-
|
1258
|
+
score = score_wrapper_sproc(
|
1259
|
+
session,
|
1237
1260
|
query,
|
1238
1261
|
stage_score_file_name,
|
1239
1262
|
identifier.get_unescaped_names(self.input_cols),
|
1240
1263
|
identifier.get_unescaped_names(self.label_cols),
|
1241
1264
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1242
|
-
statement_params
|
1265
|
+
statement_params,
|
1243
1266
|
)
|
1244
1267
|
|
1245
1268
|
cleanup_temp_files([local_score_file_name])
|
@@ -1257,18 +1280,20 @@ class NuSVC(BaseTransformer):
|
|
1257
1280
|
if self._sklearn_object._estimator_type == 'classifier':
|
1258
1281
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1259
1282
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1260
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1283
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1284
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1261
1285
|
# For regressor, the type of predict is float64
|
1262
1286
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1263
1287
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1264
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1265
|
-
|
1288
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1289
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1266
1290
|
for prob_func in PROB_FUNCTIONS:
|
1267
1291
|
if hasattr(self, prob_func):
|
1268
1292
|
output_cols_prefix: str = f"{prob_func}_"
|
1269
1293
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1270
1294
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1271
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1295
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1296
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1272
1297
|
|
1273
1298
|
@property
|
1274
1299
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|