snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
snowflake/ml/modeling/svm/svr.py
CHANGED
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -240,7 +242,6 @@ class SVR(BaseTransformer):
|
|
240
242
|
sample_weight_col: Optional[str] = None,
|
241
243
|
) -> None:
|
242
244
|
super().__init__()
|
243
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
244
245
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
245
246
|
|
246
247
|
self._deps = list(deps)
|
@@ -270,6 +271,15 @@ class SVR(BaseTransformer):
|
|
270
271
|
self.set_drop_input_cols(drop_input_cols)
|
271
272
|
self.set_sample_weight_col(sample_weight_col)
|
272
273
|
|
274
|
+
def _get_rand_id(self) -> str:
|
275
|
+
"""
|
276
|
+
Generate random id to be used in sproc and stage names.
|
277
|
+
|
278
|
+
Returns:
|
279
|
+
Random id string usable in sproc, table, and stage names.
|
280
|
+
"""
|
281
|
+
return str(uuid4()).replace("-", "_").upper()
|
282
|
+
|
273
283
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
274
284
|
"""
|
275
285
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -348,7 +358,7 @@ class SVR(BaseTransformer):
|
|
348
358
|
cp.dump(self._sklearn_object, local_transform_file)
|
349
359
|
|
350
360
|
# Create temp stage to run fit.
|
351
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
361
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
352
362
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
353
363
|
SqlResultValidator(
|
354
364
|
session=session,
|
@@ -361,11 +371,12 @@ class SVR(BaseTransformer):
|
|
361
371
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
362
372
|
).validate()
|
363
373
|
|
364
|
-
|
374
|
+
# Use posixpath to construct stage paths
|
375
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
376
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
365
377
|
local_result_file_name = get_temp_file_path()
|
366
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
367
378
|
|
368
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
379
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
369
380
|
statement_params = telemetry.get_function_usage_statement_params(
|
370
381
|
project=_PROJECT,
|
371
382
|
subproject=_SUBPROJECT,
|
@@ -391,6 +402,7 @@ class SVR(BaseTransformer):
|
|
391
402
|
replace=True,
|
392
403
|
session=session,
|
393
404
|
statement_params=statement_params,
|
405
|
+
anonymous=True
|
394
406
|
)
|
395
407
|
def fit_wrapper_sproc(
|
396
408
|
session: Session,
|
@@ -399,7 +411,8 @@ class SVR(BaseTransformer):
|
|
399
411
|
stage_result_file_name: str,
|
400
412
|
input_cols: List[str],
|
401
413
|
label_cols: List[str],
|
402
|
-
sample_weight_col: Optional[str]
|
414
|
+
sample_weight_col: Optional[str],
|
415
|
+
statement_params: Dict[str, str]
|
403
416
|
) -> str:
|
404
417
|
import cloudpickle as cp
|
405
418
|
import numpy as np
|
@@ -466,15 +479,15 @@ class SVR(BaseTransformer):
|
|
466
479
|
api_calls=[Session.call],
|
467
480
|
custom_tags=dict([("autogen", True)]),
|
468
481
|
)
|
469
|
-
sproc_export_file_name =
|
470
|
-
|
482
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
483
|
+
session,
|
471
484
|
query,
|
472
485
|
stage_transform_file_name,
|
473
486
|
stage_result_file_name,
|
474
487
|
identifier.get_unescaped_names(self.input_cols),
|
475
488
|
identifier.get_unescaped_names(self.label_cols),
|
476
489
|
identifier.get_unescaped_names(self.sample_weight_col),
|
477
|
-
statement_params
|
490
|
+
statement_params,
|
478
491
|
)
|
479
492
|
|
480
493
|
if "|" in sproc_export_file_name:
|
@@ -484,7 +497,7 @@ class SVR(BaseTransformer):
|
|
484
497
|
print("\n".join(fields[1:]))
|
485
498
|
|
486
499
|
session.file.get(
|
487
|
-
|
500
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
488
501
|
local_result_file_name,
|
489
502
|
statement_params=statement_params
|
490
503
|
)
|
@@ -530,7 +543,7 @@ class SVR(BaseTransformer):
|
|
530
543
|
|
531
544
|
# Register vectorized UDF for batch inference
|
532
545
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
533
|
-
safe_id=self.
|
546
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
534
547
|
|
535
548
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
536
549
|
# will try to pickle all of self which fails.
|
@@ -622,7 +635,7 @@ class SVR(BaseTransformer):
|
|
622
635
|
return transformed_pandas_df.to_dict("records")
|
623
636
|
|
624
637
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
625
|
-
safe_id=self.
|
638
|
+
safe_id=self._get_rand_id()
|
626
639
|
)
|
627
640
|
|
628
641
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -789,11 +802,18 @@ class SVR(BaseTransformer):
|
|
789
802
|
Transformed dataset.
|
790
803
|
"""
|
791
804
|
if isinstance(dataset, DataFrame):
|
805
|
+
expected_type_inferred = "float"
|
806
|
+
# when it is classifier, infer the datatype from label columns
|
807
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
808
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
809
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
810
|
+
)
|
811
|
+
|
792
812
|
output_df = self._batch_inference(
|
793
813
|
dataset=dataset,
|
794
814
|
inference_method="predict",
|
795
815
|
expected_output_cols_list=self.output_cols,
|
796
|
-
expected_output_cols_type=
|
816
|
+
expected_output_cols_type=expected_type_inferred,
|
797
817
|
)
|
798
818
|
elif isinstance(dataset, pd.DataFrame):
|
799
819
|
output_df = self._sklearn_inference(
|
@@ -864,10 +884,10 @@ class SVR(BaseTransformer):
|
|
864
884
|
|
865
885
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
866
886
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
867
|
-
Returns
|
887
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
868
888
|
"""
|
869
889
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
870
|
-
return []
|
890
|
+
return [output_cols_prefix]
|
871
891
|
|
872
892
|
classes = self._sklearn_object.classes_
|
873
893
|
if isinstance(classes, numpy.ndarray):
|
@@ -1092,7 +1112,7 @@ class SVR(BaseTransformer):
|
|
1092
1112
|
cp.dump(self._sklearn_object, local_score_file)
|
1093
1113
|
|
1094
1114
|
# Create temp stage to run score.
|
1095
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1115
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1096
1116
|
session = dataset._session
|
1097
1117
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1098
1118
|
SqlResultValidator(
|
@@ -1106,8 +1126,9 @@ class SVR(BaseTransformer):
|
|
1106
1126
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1107
1127
|
).validate()
|
1108
1128
|
|
1109
|
-
|
1110
|
-
|
1129
|
+
# Use posixpath to construct stage paths
|
1130
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1131
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1111
1132
|
statement_params = telemetry.get_function_usage_statement_params(
|
1112
1133
|
project=_PROJECT,
|
1113
1134
|
subproject=_SUBPROJECT,
|
@@ -1133,6 +1154,7 @@ class SVR(BaseTransformer):
|
|
1133
1154
|
replace=True,
|
1134
1155
|
session=session,
|
1135
1156
|
statement_params=statement_params,
|
1157
|
+
anonymous=True
|
1136
1158
|
)
|
1137
1159
|
def score_wrapper_sproc(
|
1138
1160
|
session: Session,
|
@@ -1140,7 +1162,8 @@ class SVR(BaseTransformer):
|
|
1140
1162
|
stage_score_file_name: str,
|
1141
1163
|
input_cols: List[str],
|
1142
1164
|
label_cols: List[str],
|
1143
|
-
sample_weight_col: Optional[str]
|
1165
|
+
sample_weight_col: Optional[str],
|
1166
|
+
statement_params: Dict[str, str]
|
1144
1167
|
) -> float:
|
1145
1168
|
import cloudpickle as cp
|
1146
1169
|
import numpy as np
|
@@ -1190,14 +1213,14 @@ class SVR(BaseTransformer):
|
|
1190
1213
|
api_calls=[Session.call],
|
1191
1214
|
custom_tags=dict([("autogen", True)]),
|
1192
1215
|
)
|
1193
|
-
score =
|
1194
|
-
|
1216
|
+
score = score_wrapper_sproc(
|
1217
|
+
session,
|
1195
1218
|
query,
|
1196
1219
|
stage_score_file_name,
|
1197
1220
|
identifier.get_unescaped_names(self.input_cols),
|
1198
1221
|
identifier.get_unescaped_names(self.label_cols),
|
1199
1222
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1200
|
-
statement_params
|
1223
|
+
statement_params,
|
1201
1224
|
)
|
1202
1225
|
|
1203
1226
|
cleanup_temp_files([local_score_file_name])
|
@@ -1215,18 +1238,20 @@ class SVR(BaseTransformer):
|
|
1215
1238
|
if self._sklearn_object._estimator_type == 'classifier':
|
1216
1239
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1217
1240
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1218
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1241
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1242
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1219
1243
|
# For regressor, the type of predict is float64
|
1220
1244
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1221
1245
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1222
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1223
|
-
|
1246
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1247
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1224
1248
|
for prob_func in PROB_FUNCTIONS:
|
1225
1249
|
if hasattr(self, prob_func):
|
1226
1250
|
output_cols_prefix: str = f"{prob_func}_"
|
1227
1251
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1228
1252
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1229
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1253
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1254
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1230
1255
|
|
1231
1256
|
@property
|
1232
1257
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -307,7 +309,6 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
307
309
|
sample_weight_col: Optional[str] = None,
|
308
310
|
) -> None:
|
309
311
|
super().__init__()
|
310
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
311
312
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
312
313
|
|
313
314
|
self._deps = list(deps)
|
@@ -338,6 +339,15 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
338
339
|
self.set_drop_input_cols(drop_input_cols)
|
339
340
|
self.set_sample_weight_col(sample_weight_col)
|
340
341
|
|
342
|
+
def _get_rand_id(self) -> str:
|
343
|
+
"""
|
344
|
+
Generate random id to be used in sproc and stage names.
|
345
|
+
|
346
|
+
Returns:
|
347
|
+
Random id string usable in sproc, table, and stage names.
|
348
|
+
"""
|
349
|
+
return str(uuid4()).replace("-", "_").upper()
|
350
|
+
|
341
351
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
342
352
|
"""
|
343
353
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -416,7 +426,7 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
416
426
|
cp.dump(self._sklearn_object, local_transform_file)
|
417
427
|
|
418
428
|
# Create temp stage to run fit.
|
419
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
429
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
420
430
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
421
431
|
SqlResultValidator(
|
422
432
|
session=session,
|
@@ -429,11 +439,12 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
429
439
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
430
440
|
).validate()
|
431
441
|
|
432
|
-
|
442
|
+
# Use posixpath to construct stage paths
|
443
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
444
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
433
445
|
local_result_file_name = get_temp_file_path()
|
434
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
435
446
|
|
436
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
447
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
437
448
|
statement_params = telemetry.get_function_usage_statement_params(
|
438
449
|
project=_PROJECT,
|
439
450
|
subproject=_SUBPROJECT,
|
@@ -459,6 +470,7 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
459
470
|
replace=True,
|
460
471
|
session=session,
|
461
472
|
statement_params=statement_params,
|
473
|
+
anonymous=True
|
462
474
|
)
|
463
475
|
def fit_wrapper_sproc(
|
464
476
|
session: Session,
|
@@ -467,7 +479,8 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
467
479
|
stage_result_file_name: str,
|
468
480
|
input_cols: List[str],
|
469
481
|
label_cols: List[str],
|
470
|
-
sample_weight_col: Optional[str]
|
482
|
+
sample_weight_col: Optional[str],
|
483
|
+
statement_params: Dict[str, str]
|
471
484
|
) -> str:
|
472
485
|
import cloudpickle as cp
|
473
486
|
import numpy as np
|
@@ -534,15 +547,15 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
534
547
|
api_calls=[Session.call],
|
535
548
|
custom_tags=dict([("autogen", True)]),
|
536
549
|
)
|
537
|
-
sproc_export_file_name =
|
538
|
-
|
550
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
551
|
+
session,
|
539
552
|
query,
|
540
553
|
stage_transform_file_name,
|
541
554
|
stage_result_file_name,
|
542
555
|
identifier.get_unescaped_names(self.input_cols),
|
543
556
|
identifier.get_unescaped_names(self.label_cols),
|
544
557
|
identifier.get_unescaped_names(self.sample_weight_col),
|
545
|
-
statement_params
|
558
|
+
statement_params,
|
546
559
|
)
|
547
560
|
|
548
561
|
if "|" in sproc_export_file_name:
|
@@ -552,7 +565,7 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
552
565
|
print("\n".join(fields[1:]))
|
553
566
|
|
554
567
|
session.file.get(
|
555
|
-
|
568
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
556
569
|
local_result_file_name,
|
557
570
|
statement_params=statement_params
|
558
571
|
)
|
@@ -598,7 +611,7 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
598
611
|
|
599
612
|
# Register vectorized UDF for batch inference
|
600
613
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
601
|
-
safe_id=self.
|
614
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
602
615
|
|
603
616
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
604
617
|
# will try to pickle all of self which fails.
|
@@ -690,7 +703,7 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
690
703
|
return transformed_pandas_df.to_dict("records")
|
691
704
|
|
692
705
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
693
|
-
safe_id=self.
|
706
|
+
safe_id=self._get_rand_id()
|
694
707
|
)
|
695
708
|
|
696
709
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -857,11 +870,18 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
857
870
|
Transformed dataset.
|
858
871
|
"""
|
859
872
|
if isinstance(dataset, DataFrame):
|
873
|
+
expected_type_inferred = ""
|
874
|
+
# when it is classifier, infer the datatype from label columns
|
875
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
876
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
877
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
878
|
+
)
|
879
|
+
|
860
880
|
output_df = self._batch_inference(
|
861
881
|
dataset=dataset,
|
862
882
|
inference_method="predict",
|
863
883
|
expected_output_cols_list=self.output_cols,
|
864
|
-
expected_output_cols_type=
|
884
|
+
expected_output_cols_type=expected_type_inferred,
|
865
885
|
)
|
866
886
|
elif isinstance(dataset, pd.DataFrame):
|
867
887
|
output_df = self._sklearn_inference(
|
@@ -932,10 +952,10 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
932
952
|
|
933
953
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
934
954
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
935
|
-
Returns
|
955
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
936
956
|
"""
|
937
957
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
938
|
-
return []
|
958
|
+
return [output_cols_prefix]
|
939
959
|
|
940
960
|
classes = self._sklearn_object.classes_
|
941
961
|
if isinstance(classes, numpy.ndarray):
|
@@ -1164,7 +1184,7 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
1164
1184
|
cp.dump(self._sklearn_object, local_score_file)
|
1165
1185
|
|
1166
1186
|
# Create temp stage to run score.
|
1167
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1187
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1168
1188
|
session = dataset._session
|
1169
1189
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1170
1190
|
SqlResultValidator(
|
@@ -1178,8 +1198,9 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
1178
1198
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1179
1199
|
).validate()
|
1180
1200
|
|
1181
|
-
|
1182
|
-
|
1201
|
+
# Use posixpath to construct stage paths
|
1202
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1203
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1183
1204
|
statement_params = telemetry.get_function_usage_statement_params(
|
1184
1205
|
project=_PROJECT,
|
1185
1206
|
subproject=_SUBPROJECT,
|
@@ -1205,6 +1226,7 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
1205
1226
|
replace=True,
|
1206
1227
|
session=session,
|
1207
1228
|
statement_params=statement_params,
|
1229
|
+
anonymous=True
|
1208
1230
|
)
|
1209
1231
|
def score_wrapper_sproc(
|
1210
1232
|
session: Session,
|
@@ -1212,7 +1234,8 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
1212
1234
|
stage_score_file_name: str,
|
1213
1235
|
input_cols: List[str],
|
1214
1236
|
label_cols: List[str],
|
1215
|
-
sample_weight_col: Optional[str]
|
1237
|
+
sample_weight_col: Optional[str],
|
1238
|
+
statement_params: Dict[str, str]
|
1216
1239
|
) -> float:
|
1217
1240
|
import cloudpickle as cp
|
1218
1241
|
import numpy as np
|
@@ -1262,14 +1285,14 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
1262
1285
|
api_calls=[Session.call],
|
1263
1286
|
custom_tags=dict([("autogen", True)]),
|
1264
1287
|
)
|
1265
|
-
score =
|
1266
|
-
|
1288
|
+
score = score_wrapper_sproc(
|
1289
|
+
session,
|
1267
1290
|
query,
|
1268
1291
|
stage_score_file_name,
|
1269
1292
|
identifier.get_unescaped_names(self.input_cols),
|
1270
1293
|
identifier.get_unescaped_names(self.label_cols),
|
1271
1294
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1272
|
-
statement_params
|
1295
|
+
statement_params,
|
1273
1296
|
)
|
1274
1297
|
|
1275
1298
|
cleanup_temp_files([local_score_file_name])
|
@@ -1287,18 +1310,20 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
1287
1310
|
if self._sklearn_object._estimator_type == 'classifier':
|
1288
1311
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1289
1312
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1290
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1313
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1314
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1291
1315
|
# For regressor, the type of predict is float64
|
1292
1316
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1293
1317
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1294
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1295
|
-
|
1318
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1319
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1296
1320
|
for prob_func in PROB_FUNCTIONS:
|
1297
1321
|
if hasattr(self, prob_func):
|
1298
1322
|
output_cols_prefix: str = f"{prob_func}_"
|
1299
1323
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1300
1324
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1301
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1325
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1326
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1302
1327
|
|
1303
1328
|
@property
|
1304
1329
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|