snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -261,7 +263,6 @@ class RidgeClassifier(BaseTransformer):
|
|
261
263
|
sample_weight_col: Optional[str] = None,
|
262
264
|
) -> None:
|
263
265
|
super().__init__()
|
264
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
265
266
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
266
267
|
|
267
268
|
self._deps = list(deps)
|
@@ -289,6 +290,15 @@ class RidgeClassifier(BaseTransformer):
|
|
289
290
|
self.set_drop_input_cols(drop_input_cols)
|
290
291
|
self.set_sample_weight_col(sample_weight_col)
|
291
292
|
|
293
|
+
def _get_rand_id(self) -> str:
|
294
|
+
"""
|
295
|
+
Generate random id to be used in sproc and stage names.
|
296
|
+
|
297
|
+
Returns:
|
298
|
+
Random id string usable in sproc, table, and stage names.
|
299
|
+
"""
|
300
|
+
return str(uuid4()).replace("-", "_").upper()
|
301
|
+
|
292
302
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
293
303
|
"""
|
294
304
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -367,7 +377,7 @@ class RidgeClassifier(BaseTransformer):
|
|
367
377
|
cp.dump(self._sklearn_object, local_transform_file)
|
368
378
|
|
369
379
|
# Create temp stage to run fit.
|
370
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
380
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
371
381
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
372
382
|
SqlResultValidator(
|
373
383
|
session=session,
|
@@ -380,11 +390,12 @@ class RidgeClassifier(BaseTransformer):
|
|
380
390
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
381
391
|
).validate()
|
382
392
|
|
383
|
-
|
393
|
+
# Use posixpath to construct stage paths
|
394
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
395
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
384
396
|
local_result_file_name = get_temp_file_path()
|
385
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
386
397
|
|
387
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
398
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
388
399
|
statement_params = telemetry.get_function_usage_statement_params(
|
389
400
|
project=_PROJECT,
|
390
401
|
subproject=_SUBPROJECT,
|
@@ -410,6 +421,7 @@ class RidgeClassifier(BaseTransformer):
|
|
410
421
|
replace=True,
|
411
422
|
session=session,
|
412
423
|
statement_params=statement_params,
|
424
|
+
anonymous=True
|
413
425
|
)
|
414
426
|
def fit_wrapper_sproc(
|
415
427
|
session: Session,
|
@@ -418,7 +430,8 @@ class RidgeClassifier(BaseTransformer):
|
|
418
430
|
stage_result_file_name: str,
|
419
431
|
input_cols: List[str],
|
420
432
|
label_cols: List[str],
|
421
|
-
sample_weight_col: Optional[str]
|
433
|
+
sample_weight_col: Optional[str],
|
434
|
+
statement_params: Dict[str, str]
|
422
435
|
) -> str:
|
423
436
|
import cloudpickle as cp
|
424
437
|
import numpy as np
|
@@ -485,15 +498,15 @@ class RidgeClassifier(BaseTransformer):
|
|
485
498
|
api_calls=[Session.call],
|
486
499
|
custom_tags=dict([("autogen", True)]),
|
487
500
|
)
|
488
|
-
sproc_export_file_name =
|
489
|
-
|
501
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
502
|
+
session,
|
490
503
|
query,
|
491
504
|
stage_transform_file_name,
|
492
505
|
stage_result_file_name,
|
493
506
|
identifier.get_unescaped_names(self.input_cols),
|
494
507
|
identifier.get_unescaped_names(self.label_cols),
|
495
508
|
identifier.get_unescaped_names(self.sample_weight_col),
|
496
|
-
statement_params
|
509
|
+
statement_params,
|
497
510
|
)
|
498
511
|
|
499
512
|
if "|" in sproc_export_file_name:
|
@@ -503,7 +516,7 @@ class RidgeClassifier(BaseTransformer):
|
|
503
516
|
print("\n".join(fields[1:]))
|
504
517
|
|
505
518
|
session.file.get(
|
506
|
-
|
519
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
507
520
|
local_result_file_name,
|
508
521
|
statement_params=statement_params
|
509
522
|
)
|
@@ -549,7 +562,7 @@ class RidgeClassifier(BaseTransformer):
|
|
549
562
|
|
550
563
|
# Register vectorized UDF for batch inference
|
551
564
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
552
|
-
safe_id=self.
|
565
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
553
566
|
|
554
567
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
555
568
|
# will try to pickle all of self which fails.
|
@@ -641,7 +654,7 @@ class RidgeClassifier(BaseTransformer):
|
|
641
654
|
return transformed_pandas_df.to_dict("records")
|
642
655
|
|
643
656
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
644
|
-
safe_id=self.
|
657
|
+
safe_id=self._get_rand_id()
|
645
658
|
)
|
646
659
|
|
647
660
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -808,11 +821,18 @@ class RidgeClassifier(BaseTransformer):
|
|
808
821
|
Transformed dataset.
|
809
822
|
"""
|
810
823
|
if isinstance(dataset, DataFrame):
|
824
|
+
expected_type_inferred = ""
|
825
|
+
# when it is classifier, infer the datatype from label columns
|
826
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
827
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
828
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
829
|
+
)
|
830
|
+
|
811
831
|
output_df = self._batch_inference(
|
812
832
|
dataset=dataset,
|
813
833
|
inference_method="predict",
|
814
834
|
expected_output_cols_list=self.output_cols,
|
815
|
-
expected_output_cols_type=
|
835
|
+
expected_output_cols_type=expected_type_inferred,
|
816
836
|
)
|
817
837
|
elif isinstance(dataset, pd.DataFrame):
|
818
838
|
output_df = self._sklearn_inference(
|
@@ -883,10 +903,10 @@ class RidgeClassifier(BaseTransformer):
|
|
883
903
|
|
884
904
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
885
905
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
886
|
-
Returns
|
906
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
887
907
|
"""
|
888
908
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
889
|
-
return []
|
909
|
+
return [output_cols_prefix]
|
890
910
|
|
891
911
|
classes = self._sklearn_object.classes_
|
892
912
|
if isinstance(classes, numpy.ndarray):
|
@@ -1113,7 +1133,7 @@ class RidgeClassifier(BaseTransformer):
|
|
1113
1133
|
cp.dump(self._sklearn_object, local_score_file)
|
1114
1134
|
|
1115
1135
|
# Create temp stage to run score.
|
1116
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1136
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1117
1137
|
session = dataset._session
|
1118
1138
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1119
1139
|
SqlResultValidator(
|
@@ -1127,8 +1147,9 @@ class RidgeClassifier(BaseTransformer):
|
|
1127
1147
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1128
1148
|
).validate()
|
1129
1149
|
|
1130
|
-
|
1131
|
-
|
1150
|
+
# Use posixpath to construct stage paths
|
1151
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1152
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1132
1153
|
statement_params = telemetry.get_function_usage_statement_params(
|
1133
1154
|
project=_PROJECT,
|
1134
1155
|
subproject=_SUBPROJECT,
|
@@ -1154,6 +1175,7 @@ class RidgeClassifier(BaseTransformer):
|
|
1154
1175
|
replace=True,
|
1155
1176
|
session=session,
|
1156
1177
|
statement_params=statement_params,
|
1178
|
+
anonymous=True
|
1157
1179
|
)
|
1158
1180
|
def score_wrapper_sproc(
|
1159
1181
|
session: Session,
|
@@ -1161,7 +1183,8 @@ class RidgeClassifier(BaseTransformer):
|
|
1161
1183
|
stage_score_file_name: str,
|
1162
1184
|
input_cols: List[str],
|
1163
1185
|
label_cols: List[str],
|
1164
|
-
sample_weight_col: Optional[str]
|
1186
|
+
sample_weight_col: Optional[str],
|
1187
|
+
statement_params: Dict[str, str]
|
1165
1188
|
) -> float:
|
1166
1189
|
import cloudpickle as cp
|
1167
1190
|
import numpy as np
|
@@ -1211,14 +1234,14 @@ class RidgeClassifier(BaseTransformer):
|
|
1211
1234
|
api_calls=[Session.call],
|
1212
1235
|
custom_tags=dict([("autogen", True)]),
|
1213
1236
|
)
|
1214
|
-
score =
|
1215
|
-
|
1237
|
+
score = score_wrapper_sproc(
|
1238
|
+
session,
|
1216
1239
|
query,
|
1217
1240
|
stage_score_file_name,
|
1218
1241
|
identifier.get_unescaped_names(self.input_cols),
|
1219
1242
|
identifier.get_unescaped_names(self.label_cols),
|
1220
1243
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1221
|
-
statement_params
|
1244
|
+
statement_params,
|
1222
1245
|
)
|
1223
1246
|
|
1224
1247
|
cleanup_temp_files([local_score_file_name])
|
@@ -1236,18 +1259,20 @@ class RidgeClassifier(BaseTransformer):
|
|
1236
1259
|
if self._sklearn_object._estimator_type == 'classifier':
|
1237
1260
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1238
1261
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1239
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1262
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1263
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1240
1264
|
# For regressor, the type of predict is float64
|
1241
1265
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1242
1266
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1243
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1244
|
-
|
1267
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1268
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1245
1269
|
for prob_func in PROB_FUNCTIONS:
|
1246
1270
|
if hasattr(self, prob_func):
|
1247
1271
|
output_cols_prefix: str = f"{prob_func}_"
|
1248
1272
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1249
1273
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1250
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1274
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1275
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1251
1276
|
|
1252
1277
|
@property
|
1253
1278
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -230,7 +232,6 @@ class RidgeClassifierCV(BaseTransformer):
|
|
230
232
|
sample_weight_col: Optional[str] = None,
|
231
233
|
) -> None:
|
232
234
|
super().__init__()
|
233
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
234
235
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
235
236
|
|
236
237
|
self._deps = list(deps)
|
@@ -255,6 +256,15 @@ class RidgeClassifierCV(BaseTransformer):
|
|
255
256
|
self.set_drop_input_cols(drop_input_cols)
|
256
257
|
self.set_sample_weight_col(sample_weight_col)
|
257
258
|
|
259
|
+
def _get_rand_id(self) -> str:
|
260
|
+
"""
|
261
|
+
Generate random id to be used in sproc and stage names.
|
262
|
+
|
263
|
+
Returns:
|
264
|
+
Random id string usable in sproc, table, and stage names.
|
265
|
+
"""
|
266
|
+
return str(uuid4()).replace("-", "_").upper()
|
267
|
+
|
258
268
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
259
269
|
"""
|
260
270
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -333,7 +343,7 @@ class RidgeClassifierCV(BaseTransformer):
|
|
333
343
|
cp.dump(self._sklearn_object, local_transform_file)
|
334
344
|
|
335
345
|
# Create temp stage to run fit.
|
336
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
346
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
337
347
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
338
348
|
SqlResultValidator(
|
339
349
|
session=session,
|
@@ -346,11 +356,12 @@ class RidgeClassifierCV(BaseTransformer):
|
|
346
356
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
347
357
|
).validate()
|
348
358
|
|
349
|
-
|
359
|
+
# Use posixpath to construct stage paths
|
360
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
361
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
350
362
|
local_result_file_name = get_temp_file_path()
|
351
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
352
363
|
|
353
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
364
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
354
365
|
statement_params = telemetry.get_function_usage_statement_params(
|
355
366
|
project=_PROJECT,
|
356
367
|
subproject=_SUBPROJECT,
|
@@ -376,6 +387,7 @@ class RidgeClassifierCV(BaseTransformer):
|
|
376
387
|
replace=True,
|
377
388
|
session=session,
|
378
389
|
statement_params=statement_params,
|
390
|
+
anonymous=True
|
379
391
|
)
|
380
392
|
def fit_wrapper_sproc(
|
381
393
|
session: Session,
|
@@ -384,7 +396,8 @@ class RidgeClassifierCV(BaseTransformer):
|
|
384
396
|
stage_result_file_name: str,
|
385
397
|
input_cols: List[str],
|
386
398
|
label_cols: List[str],
|
387
|
-
sample_weight_col: Optional[str]
|
399
|
+
sample_weight_col: Optional[str],
|
400
|
+
statement_params: Dict[str, str]
|
388
401
|
) -> str:
|
389
402
|
import cloudpickle as cp
|
390
403
|
import numpy as np
|
@@ -451,15 +464,15 @@ class RidgeClassifierCV(BaseTransformer):
|
|
451
464
|
api_calls=[Session.call],
|
452
465
|
custom_tags=dict([("autogen", True)]),
|
453
466
|
)
|
454
|
-
sproc_export_file_name =
|
455
|
-
|
467
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
468
|
+
session,
|
456
469
|
query,
|
457
470
|
stage_transform_file_name,
|
458
471
|
stage_result_file_name,
|
459
472
|
identifier.get_unescaped_names(self.input_cols),
|
460
473
|
identifier.get_unescaped_names(self.label_cols),
|
461
474
|
identifier.get_unescaped_names(self.sample_weight_col),
|
462
|
-
statement_params
|
475
|
+
statement_params,
|
463
476
|
)
|
464
477
|
|
465
478
|
if "|" in sproc_export_file_name:
|
@@ -469,7 +482,7 @@ class RidgeClassifierCV(BaseTransformer):
|
|
469
482
|
print("\n".join(fields[1:]))
|
470
483
|
|
471
484
|
session.file.get(
|
472
|
-
|
485
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
473
486
|
local_result_file_name,
|
474
487
|
statement_params=statement_params
|
475
488
|
)
|
@@ -515,7 +528,7 @@ class RidgeClassifierCV(BaseTransformer):
|
|
515
528
|
|
516
529
|
# Register vectorized UDF for batch inference
|
517
530
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
518
|
-
safe_id=self.
|
531
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
519
532
|
|
520
533
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
521
534
|
# will try to pickle all of self which fails.
|
@@ -607,7 +620,7 @@ class RidgeClassifierCV(BaseTransformer):
|
|
607
620
|
return transformed_pandas_df.to_dict("records")
|
608
621
|
|
609
622
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
610
|
-
safe_id=self.
|
623
|
+
safe_id=self._get_rand_id()
|
611
624
|
)
|
612
625
|
|
613
626
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -774,11 +787,18 @@ class RidgeClassifierCV(BaseTransformer):
|
|
774
787
|
Transformed dataset.
|
775
788
|
"""
|
776
789
|
if isinstance(dataset, DataFrame):
|
790
|
+
expected_type_inferred = ""
|
791
|
+
# when it is classifier, infer the datatype from label columns
|
792
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
793
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
794
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
795
|
+
)
|
796
|
+
|
777
797
|
output_df = self._batch_inference(
|
778
798
|
dataset=dataset,
|
779
799
|
inference_method="predict",
|
780
800
|
expected_output_cols_list=self.output_cols,
|
781
|
-
expected_output_cols_type=
|
801
|
+
expected_output_cols_type=expected_type_inferred,
|
782
802
|
)
|
783
803
|
elif isinstance(dataset, pd.DataFrame):
|
784
804
|
output_df = self._sklearn_inference(
|
@@ -849,10 +869,10 @@ class RidgeClassifierCV(BaseTransformer):
|
|
849
869
|
|
850
870
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
851
871
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
852
|
-
Returns
|
872
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
853
873
|
"""
|
854
874
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
855
|
-
return []
|
875
|
+
return [output_cols_prefix]
|
856
876
|
|
857
877
|
classes = self._sklearn_object.classes_
|
858
878
|
if isinstance(classes, numpy.ndarray):
|
@@ -1079,7 +1099,7 @@ class RidgeClassifierCV(BaseTransformer):
|
|
1079
1099
|
cp.dump(self._sklearn_object, local_score_file)
|
1080
1100
|
|
1081
1101
|
# Create temp stage to run score.
|
1082
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1102
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1083
1103
|
session = dataset._session
|
1084
1104
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1085
1105
|
SqlResultValidator(
|
@@ -1093,8 +1113,9 @@ class RidgeClassifierCV(BaseTransformer):
|
|
1093
1113
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1094
1114
|
).validate()
|
1095
1115
|
|
1096
|
-
|
1097
|
-
|
1116
|
+
# Use posixpath to construct stage paths
|
1117
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1118
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1098
1119
|
statement_params = telemetry.get_function_usage_statement_params(
|
1099
1120
|
project=_PROJECT,
|
1100
1121
|
subproject=_SUBPROJECT,
|
@@ -1120,6 +1141,7 @@ class RidgeClassifierCV(BaseTransformer):
|
|
1120
1141
|
replace=True,
|
1121
1142
|
session=session,
|
1122
1143
|
statement_params=statement_params,
|
1144
|
+
anonymous=True
|
1123
1145
|
)
|
1124
1146
|
def score_wrapper_sproc(
|
1125
1147
|
session: Session,
|
@@ -1127,7 +1149,8 @@ class RidgeClassifierCV(BaseTransformer):
|
|
1127
1149
|
stage_score_file_name: str,
|
1128
1150
|
input_cols: List[str],
|
1129
1151
|
label_cols: List[str],
|
1130
|
-
sample_weight_col: Optional[str]
|
1152
|
+
sample_weight_col: Optional[str],
|
1153
|
+
statement_params: Dict[str, str]
|
1131
1154
|
) -> float:
|
1132
1155
|
import cloudpickle as cp
|
1133
1156
|
import numpy as np
|
@@ -1177,14 +1200,14 @@ class RidgeClassifierCV(BaseTransformer):
|
|
1177
1200
|
api_calls=[Session.call],
|
1178
1201
|
custom_tags=dict([("autogen", True)]),
|
1179
1202
|
)
|
1180
|
-
score =
|
1181
|
-
|
1203
|
+
score = score_wrapper_sproc(
|
1204
|
+
session,
|
1182
1205
|
query,
|
1183
1206
|
stage_score_file_name,
|
1184
1207
|
identifier.get_unescaped_names(self.input_cols),
|
1185
1208
|
identifier.get_unescaped_names(self.label_cols),
|
1186
1209
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1187
|
-
statement_params
|
1210
|
+
statement_params,
|
1188
1211
|
)
|
1189
1212
|
|
1190
1213
|
cleanup_temp_files([local_score_file_name])
|
@@ -1202,18 +1225,20 @@ class RidgeClassifierCV(BaseTransformer):
|
|
1202
1225
|
if self._sklearn_object._estimator_type == 'classifier':
|
1203
1226
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1204
1227
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1205
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1228
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1229
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1206
1230
|
# For regressor, the type of predict is float64
|
1207
1231
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1208
1232
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1209
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1210
|
-
|
1233
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1234
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1211
1235
|
for prob_func in PROB_FUNCTIONS:
|
1212
1236
|
if hasattr(self, prob_func):
|
1213
1237
|
output_cols_prefix: str = f"{prob_func}_"
|
1214
1238
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1215
1239
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1216
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1240
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1241
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1217
1242
|
|
1218
1243
|
@property
|
1219
1244
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|