snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -196,7 +198,6 @@ class OneVsOneClassifier(BaseTransformer):
|
|
196
198
|
sample_weight_col: Optional[str] = None,
|
197
199
|
) -> None:
|
198
200
|
super().__init__()
|
199
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
200
201
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
201
202
|
deps = deps | _gather_dependencies(estimator)
|
202
203
|
self._deps = list(deps)
|
@@ -217,6 +218,15 @@ class OneVsOneClassifier(BaseTransformer):
|
|
217
218
|
self.set_drop_input_cols(drop_input_cols)
|
218
219
|
self.set_sample_weight_col(sample_weight_col)
|
219
220
|
|
221
|
+
def _get_rand_id(self) -> str:
|
222
|
+
"""
|
223
|
+
Generate random id to be used in sproc and stage names.
|
224
|
+
|
225
|
+
Returns:
|
226
|
+
Random id string usable in sproc, table, and stage names.
|
227
|
+
"""
|
228
|
+
return str(uuid4()).replace("-", "_").upper()
|
229
|
+
|
220
230
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
221
231
|
"""
|
222
232
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -295,7 +305,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
295
305
|
cp.dump(self._sklearn_object, local_transform_file)
|
296
306
|
|
297
307
|
# Create temp stage to run fit.
|
298
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
308
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
299
309
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
300
310
|
SqlResultValidator(
|
301
311
|
session=session,
|
@@ -308,11 +318,12 @@ class OneVsOneClassifier(BaseTransformer):
|
|
308
318
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
309
319
|
).validate()
|
310
320
|
|
311
|
-
|
321
|
+
# Use posixpath to construct stage paths
|
322
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
323
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
312
324
|
local_result_file_name = get_temp_file_path()
|
313
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
314
325
|
|
315
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
326
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
316
327
|
statement_params = telemetry.get_function_usage_statement_params(
|
317
328
|
project=_PROJECT,
|
318
329
|
subproject=_SUBPROJECT,
|
@@ -338,6 +349,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
338
349
|
replace=True,
|
339
350
|
session=session,
|
340
351
|
statement_params=statement_params,
|
352
|
+
anonymous=True
|
341
353
|
)
|
342
354
|
def fit_wrapper_sproc(
|
343
355
|
session: Session,
|
@@ -346,7 +358,8 @@ class OneVsOneClassifier(BaseTransformer):
|
|
346
358
|
stage_result_file_name: str,
|
347
359
|
input_cols: List[str],
|
348
360
|
label_cols: List[str],
|
349
|
-
sample_weight_col: Optional[str]
|
361
|
+
sample_weight_col: Optional[str],
|
362
|
+
statement_params: Dict[str, str]
|
350
363
|
) -> str:
|
351
364
|
import cloudpickle as cp
|
352
365
|
import numpy as np
|
@@ -413,15 +426,15 @@ class OneVsOneClassifier(BaseTransformer):
|
|
413
426
|
api_calls=[Session.call],
|
414
427
|
custom_tags=dict([("autogen", True)]),
|
415
428
|
)
|
416
|
-
sproc_export_file_name =
|
417
|
-
|
429
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
430
|
+
session,
|
418
431
|
query,
|
419
432
|
stage_transform_file_name,
|
420
433
|
stage_result_file_name,
|
421
434
|
identifier.get_unescaped_names(self.input_cols),
|
422
435
|
identifier.get_unescaped_names(self.label_cols),
|
423
436
|
identifier.get_unescaped_names(self.sample_weight_col),
|
424
|
-
statement_params
|
437
|
+
statement_params,
|
425
438
|
)
|
426
439
|
|
427
440
|
if "|" in sproc_export_file_name:
|
@@ -431,7 +444,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
431
444
|
print("\n".join(fields[1:]))
|
432
445
|
|
433
446
|
session.file.get(
|
434
|
-
|
447
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
435
448
|
local_result_file_name,
|
436
449
|
statement_params=statement_params
|
437
450
|
)
|
@@ -477,7 +490,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
477
490
|
|
478
491
|
# Register vectorized UDF for batch inference
|
479
492
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
480
|
-
safe_id=self.
|
493
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
481
494
|
|
482
495
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
483
496
|
# will try to pickle all of self which fails.
|
@@ -569,7 +582,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
569
582
|
return transformed_pandas_df.to_dict("records")
|
570
583
|
|
571
584
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
572
|
-
safe_id=self.
|
585
|
+
safe_id=self._get_rand_id()
|
573
586
|
)
|
574
587
|
|
575
588
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -736,11 +749,18 @@ class OneVsOneClassifier(BaseTransformer):
|
|
736
749
|
Transformed dataset.
|
737
750
|
"""
|
738
751
|
if isinstance(dataset, DataFrame):
|
752
|
+
expected_type_inferred = ""
|
753
|
+
# when it is classifier, infer the datatype from label columns
|
754
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
755
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
756
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
757
|
+
)
|
758
|
+
|
739
759
|
output_df = self._batch_inference(
|
740
760
|
dataset=dataset,
|
741
761
|
inference_method="predict",
|
742
762
|
expected_output_cols_list=self.output_cols,
|
743
|
-
expected_output_cols_type=
|
763
|
+
expected_output_cols_type=expected_type_inferred,
|
744
764
|
)
|
745
765
|
elif isinstance(dataset, pd.DataFrame):
|
746
766
|
output_df = self._sklearn_inference(
|
@@ -811,10 +831,10 @@ class OneVsOneClassifier(BaseTransformer):
|
|
811
831
|
|
812
832
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
813
833
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
814
|
-
Returns
|
834
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
815
835
|
"""
|
816
836
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
817
|
-
return []
|
837
|
+
return [output_cols_prefix]
|
818
838
|
|
819
839
|
classes = self._sklearn_object.classes_
|
820
840
|
if isinstance(classes, numpy.ndarray):
|
@@ -1041,7 +1061,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
1041
1061
|
cp.dump(self._sklearn_object, local_score_file)
|
1042
1062
|
|
1043
1063
|
# Create temp stage to run score.
|
1044
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1064
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1045
1065
|
session = dataset._session
|
1046
1066
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1047
1067
|
SqlResultValidator(
|
@@ -1055,8 +1075,9 @@ class OneVsOneClassifier(BaseTransformer):
|
|
1055
1075
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1056
1076
|
).validate()
|
1057
1077
|
|
1058
|
-
|
1059
|
-
|
1078
|
+
# Use posixpath to construct stage paths
|
1079
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1080
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1060
1081
|
statement_params = telemetry.get_function_usage_statement_params(
|
1061
1082
|
project=_PROJECT,
|
1062
1083
|
subproject=_SUBPROJECT,
|
@@ -1082,6 +1103,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
1082
1103
|
replace=True,
|
1083
1104
|
session=session,
|
1084
1105
|
statement_params=statement_params,
|
1106
|
+
anonymous=True
|
1085
1107
|
)
|
1086
1108
|
def score_wrapper_sproc(
|
1087
1109
|
session: Session,
|
@@ -1089,7 +1111,8 @@ class OneVsOneClassifier(BaseTransformer):
|
|
1089
1111
|
stage_score_file_name: str,
|
1090
1112
|
input_cols: List[str],
|
1091
1113
|
label_cols: List[str],
|
1092
|
-
sample_weight_col: Optional[str]
|
1114
|
+
sample_weight_col: Optional[str],
|
1115
|
+
statement_params: Dict[str, str]
|
1093
1116
|
) -> float:
|
1094
1117
|
import cloudpickle as cp
|
1095
1118
|
import numpy as np
|
@@ -1139,14 +1162,14 @@ class OneVsOneClassifier(BaseTransformer):
|
|
1139
1162
|
api_calls=[Session.call],
|
1140
1163
|
custom_tags=dict([("autogen", True)]),
|
1141
1164
|
)
|
1142
|
-
score =
|
1143
|
-
|
1165
|
+
score = score_wrapper_sproc(
|
1166
|
+
session,
|
1144
1167
|
query,
|
1145
1168
|
stage_score_file_name,
|
1146
1169
|
identifier.get_unescaped_names(self.input_cols),
|
1147
1170
|
identifier.get_unescaped_names(self.label_cols),
|
1148
1171
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1149
|
-
statement_params
|
1172
|
+
statement_params,
|
1150
1173
|
)
|
1151
1174
|
|
1152
1175
|
cleanup_temp_files([local_score_file_name])
|
@@ -1164,18 +1187,20 @@ class OneVsOneClassifier(BaseTransformer):
|
|
1164
1187
|
if self._sklearn_object._estimator_type == 'classifier':
|
1165
1188
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1166
1189
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1167
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1190
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1191
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1168
1192
|
# For regressor, the type of predict is float64
|
1169
1193
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1170
1194
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1171
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1172
|
-
|
1195
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1196
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1173
1197
|
for prob_func in PROB_FUNCTIONS:
|
1174
1198
|
if hasattr(self, prob_func):
|
1175
1199
|
output_cols_prefix: str = f"{prob_func}_"
|
1176
1200
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1177
1201
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1178
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1202
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1203
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1179
1204
|
|
1180
1205
|
@property
|
1181
1206
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -204,7 +206,6 @@ class OneVsRestClassifier(BaseTransformer):
|
|
204
206
|
sample_weight_col: Optional[str] = None,
|
205
207
|
) -> None:
|
206
208
|
super().__init__()
|
207
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
208
209
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
209
210
|
deps = deps | _gather_dependencies(estimator)
|
210
211
|
self._deps = list(deps)
|
@@ -226,6 +227,15 @@ class OneVsRestClassifier(BaseTransformer):
|
|
226
227
|
self.set_drop_input_cols(drop_input_cols)
|
227
228
|
self.set_sample_weight_col(sample_weight_col)
|
228
229
|
|
230
|
+
def _get_rand_id(self) -> str:
|
231
|
+
"""
|
232
|
+
Generate random id to be used in sproc and stage names.
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
Random id string usable in sproc, table, and stage names.
|
236
|
+
"""
|
237
|
+
return str(uuid4()).replace("-", "_").upper()
|
238
|
+
|
229
239
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
230
240
|
"""
|
231
241
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -304,7 +314,7 @@ class OneVsRestClassifier(BaseTransformer):
|
|
304
314
|
cp.dump(self._sklearn_object, local_transform_file)
|
305
315
|
|
306
316
|
# Create temp stage to run fit.
|
307
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
317
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
308
318
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
309
319
|
SqlResultValidator(
|
310
320
|
session=session,
|
@@ -317,11 +327,12 @@ class OneVsRestClassifier(BaseTransformer):
|
|
317
327
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
318
328
|
).validate()
|
319
329
|
|
320
|
-
|
330
|
+
# Use posixpath to construct stage paths
|
331
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
332
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
321
333
|
local_result_file_name = get_temp_file_path()
|
322
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
323
334
|
|
324
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
335
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
325
336
|
statement_params = telemetry.get_function_usage_statement_params(
|
326
337
|
project=_PROJECT,
|
327
338
|
subproject=_SUBPROJECT,
|
@@ -347,6 +358,7 @@ class OneVsRestClassifier(BaseTransformer):
|
|
347
358
|
replace=True,
|
348
359
|
session=session,
|
349
360
|
statement_params=statement_params,
|
361
|
+
anonymous=True
|
350
362
|
)
|
351
363
|
def fit_wrapper_sproc(
|
352
364
|
session: Session,
|
@@ -355,7 +367,8 @@ class OneVsRestClassifier(BaseTransformer):
|
|
355
367
|
stage_result_file_name: str,
|
356
368
|
input_cols: List[str],
|
357
369
|
label_cols: List[str],
|
358
|
-
sample_weight_col: Optional[str]
|
370
|
+
sample_weight_col: Optional[str],
|
371
|
+
statement_params: Dict[str, str]
|
359
372
|
) -> str:
|
360
373
|
import cloudpickle as cp
|
361
374
|
import numpy as np
|
@@ -422,15 +435,15 @@ class OneVsRestClassifier(BaseTransformer):
|
|
422
435
|
api_calls=[Session.call],
|
423
436
|
custom_tags=dict([("autogen", True)]),
|
424
437
|
)
|
425
|
-
sproc_export_file_name =
|
426
|
-
|
438
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
439
|
+
session,
|
427
440
|
query,
|
428
441
|
stage_transform_file_name,
|
429
442
|
stage_result_file_name,
|
430
443
|
identifier.get_unescaped_names(self.input_cols),
|
431
444
|
identifier.get_unescaped_names(self.label_cols),
|
432
445
|
identifier.get_unescaped_names(self.sample_weight_col),
|
433
|
-
statement_params
|
446
|
+
statement_params,
|
434
447
|
)
|
435
448
|
|
436
449
|
if "|" in sproc_export_file_name:
|
@@ -440,7 +453,7 @@ class OneVsRestClassifier(BaseTransformer):
|
|
440
453
|
print("\n".join(fields[1:]))
|
441
454
|
|
442
455
|
session.file.get(
|
443
|
-
|
456
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
444
457
|
local_result_file_name,
|
445
458
|
statement_params=statement_params
|
446
459
|
)
|
@@ -486,7 +499,7 @@ class OneVsRestClassifier(BaseTransformer):
|
|
486
499
|
|
487
500
|
# Register vectorized UDF for batch inference
|
488
501
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
489
|
-
safe_id=self.
|
502
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
490
503
|
|
491
504
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
492
505
|
# will try to pickle all of self which fails.
|
@@ -578,7 +591,7 @@ class OneVsRestClassifier(BaseTransformer):
|
|
578
591
|
return transformed_pandas_df.to_dict("records")
|
579
592
|
|
580
593
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
581
|
-
safe_id=self.
|
594
|
+
safe_id=self._get_rand_id()
|
582
595
|
)
|
583
596
|
|
584
597
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -745,11 +758,18 @@ class OneVsRestClassifier(BaseTransformer):
|
|
745
758
|
Transformed dataset.
|
746
759
|
"""
|
747
760
|
if isinstance(dataset, DataFrame):
|
761
|
+
expected_type_inferred = ""
|
762
|
+
# when it is classifier, infer the datatype from label columns
|
763
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
764
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
765
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
766
|
+
)
|
767
|
+
|
748
768
|
output_df = self._batch_inference(
|
749
769
|
dataset=dataset,
|
750
770
|
inference_method="predict",
|
751
771
|
expected_output_cols_list=self.output_cols,
|
752
|
-
expected_output_cols_type=
|
772
|
+
expected_output_cols_type=expected_type_inferred,
|
753
773
|
)
|
754
774
|
elif isinstance(dataset, pd.DataFrame):
|
755
775
|
output_df = self._sklearn_inference(
|
@@ -820,10 +840,10 @@ class OneVsRestClassifier(BaseTransformer):
|
|
820
840
|
|
821
841
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
822
842
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
823
|
-
Returns
|
843
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
824
844
|
"""
|
825
845
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
826
|
-
return []
|
846
|
+
return [output_cols_prefix]
|
827
847
|
|
828
848
|
classes = self._sklearn_object.classes_
|
829
849
|
if isinstance(classes, numpy.ndarray):
|
@@ -1054,7 +1074,7 @@ class OneVsRestClassifier(BaseTransformer):
|
|
1054
1074
|
cp.dump(self._sklearn_object, local_score_file)
|
1055
1075
|
|
1056
1076
|
# Create temp stage to run score.
|
1057
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1077
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1058
1078
|
session = dataset._session
|
1059
1079
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1060
1080
|
SqlResultValidator(
|
@@ -1068,8 +1088,9 @@ class OneVsRestClassifier(BaseTransformer):
|
|
1068
1088
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1069
1089
|
).validate()
|
1070
1090
|
|
1071
|
-
|
1072
|
-
|
1091
|
+
# Use posixpath to construct stage paths
|
1092
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1093
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1073
1094
|
statement_params = telemetry.get_function_usage_statement_params(
|
1074
1095
|
project=_PROJECT,
|
1075
1096
|
subproject=_SUBPROJECT,
|
@@ -1095,6 +1116,7 @@ class OneVsRestClassifier(BaseTransformer):
|
|
1095
1116
|
replace=True,
|
1096
1117
|
session=session,
|
1097
1118
|
statement_params=statement_params,
|
1119
|
+
anonymous=True
|
1098
1120
|
)
|
1099
1121
|
def score_wrapper_sproc(
|
1100
1122
|
session: Session,
|
@@ -1102,7 +1124,8 @@ class OneVsRestClassifier(BaseTransformer):
|
|
1102
1124
|
stage_score_file_name: str,
|
1103
1125
|
input_cols: List[str],
|
1104
1126
|
label_cols: List[str],
|
1105
|
-
sample_weight_col: Optional[str]
|
1127
|
+
sample_weight_col: Optional[str],
|
1128
|
+
statement_params: Dict[str, str]
|
1106
1129
|
) -> float:
|
1107
1130
|
import cloudpickle as cp
|
1108
1131
|
import numpy as np
|
@@ -1152,14 +1175,14 @@ class OneVsRestClassifier(BaseTransformer):
|
|
1152
1175
|
api_calls=[Session.call],
|
1153
1176
|
custom_tags=dict([("autogen", True)]),
|
1154
1177
|
)
|
1155
|
-
score =
|
1156
|
-
|
1178
|
+
score = score_wrapper_sproc(
|
1179
|
+
session,
|
1157
1180
|
query,
|
1158
1181
|
stage_score_file_name,
|
1159
1182
|
identifier.get_unescaped_names(self.input_cols),
|
1160
1183
|
identifier.get_unescaped_names(self.label_cols),
|
1161
1184
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1162
|
-
statement_params
|
1185
|
+
statement_params,
|
1163
1186
|
)
|
1164
1187
|
|
1165
1188
|
cleanup_temp_files([local_score_file_name])
|
@@ -1177,18 +1200,20 @@ class OneVsRestClassifier(BaseTransformer):
|
|
1177
1200
|
if self._sklearn_object._estimator_type == 'classifier':
|
1178
1201
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1179
1202
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1180
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1203
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1204
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1181
1205
|
# For regressor, the type of predict is float64
|
1182
1206
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1183
1207
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1184
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1185
|
-
|
1208
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1209
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1186
1210
|
for prob_func in PROB_FUNCTIONS:
|
1187
1211
|
if hasattr(self, prob_func):
|
1188
1212
|
output_cols_prefix: str = f"{prob_func}_"
|
1189
1213
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1190
1214
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1191
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1215
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1216
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1192
1217
|
|
1193
1218
|
@property
|
1194
1219
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|