snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -196,7 +198,6 @@ class LedoitWolf(BaseTransformer):
|
|
196
198
|
sample_weight_col: Optional[str] = None,
|
197
199
|
) -> None:
|
198
200
|
super().__init__()
|
199
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
200
201
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
201
202
|
|
202
203
|
self._deps = list(deps)
|
@@ -218,6 +219,15 @@ class LedoitWolf(BaseTransformer):
|
|
218
219
|
self.set_drop_input_cols(drop_input_cols)
|
219
220
|
self.set_sample_weight_col(sample_weight_col)
|
220
221
|
|
222
|
+
def _get_rand_id(self) -> str:
|
223
|
+
"""
|
224
|
+
Generate random id to be used in sproc and stage names.
|
225
|
+
|
226
|
+
Returns:
|
227
|
+
Random id string usable in sproc, table, and stage names.
|
228
|
+
"""
|
229
|
+
return str(uuid4()).replace("-", "_").upper()
|
230
|
+
|
221
231
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
222
232
|
"""
|
223
233
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -296,7 +306,7 @@ class LedoitWolf(BaseTransformer):
|
|
296
306
|
cp.dump(self._sklearn_object, local_transform_file)
|
297
307
|
|
298
308
|
# Create temp stage to run fit.
|
299
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
309
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
300
310
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
301
311
|
SqlResultValidator(
|
302
312
|
session=session,
|
@@ -309,11 +319,12 @@ class LedoitWolf(BaseTransformer):
|
|
309
319
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
310
320
|
).validate()
|
311
321
|
|
312
|
-
|
322
|
+
# Use posixpath to construct stage paths
|
323
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
324
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
313
325
|
local_result_file_name = get_temp_file_path()
|
314
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
315
326
|
|
316
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
327
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
317
328
|
statement_params = telemetry.get_function_usage_statement_params(
|
318
329
|
project=_PROJECT,
|
319
330
|
subproject=_SUBPROJECT,
|
@@ -339,6 +350,7 @@ class LedoitWolf(BaseTransformer):
|
|
339
350
|
replace=True,
|
340
351
|
session=session,
|
341
352
|
statement_params=statement_params,
|
353
|
+
anonymous=True
|
342
354
|
)
|
343
355
|
def fit_wrapper_sproc(
|
344
356
|
session: Session,
|
@@ -347,7 +359,8 @@ class LedoitWolf(BaseTransformer):
|
|
347
359
|
stage_result_file_name: str,
|
348
360
|
input_cols: List[str],
|
349
361
|
label_cols: List[str],
|
350
|
-
sample_weight_col: Optional[str]
|
362
|
+
sample_weight_col: Optional[str],
|
363
|
+
statement_params: Dict[str, str]
|
351
364
|
) -> str:
|
352
365
|
import cloudpickle as cp
|
353
366
|
import numpy as np
|
@@ -414,15 +427,15 @@ class LedoitWolf(BaseTransformer):
|
|
414
427
|
api_calls=[Session.call],
|
415
428
|
custom_tags=dict([("autogen", True)]),
|
416
429
|
)
|
417
|
-
sproc_export_file_name =
|
418
|
-
|
430
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
431
|
+
session,
|
419
432
|
query,
|
420
433
|
stage_transform_file_name,
|
421
434
|
stage_result_file_name,
|
422
435
|
identifier.get_unescaped_names(self.input_cols),
|
423
436
|
identifier.get_unescaped_names(self.label_cols),
|
424
437
|
identifier.get_unescaped_names(self.sample_weight_col),
|
425
|
-
statement_params
|
438
|
+
statement_params,
|
426
439
|
)
|
427
440
|
|
428
441
|
if "|" in sproc_export_file_name:
|
@@ -432,7 +445,7 @@ class LedoitWolf(BaseTransformer):
|
|
432
445
|
print("\n".join(fields[1:]))
|
433
446
|
|
434
447
|
session.file.get(
|
435
|
-
|
448
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
436
449
|
local_result_file_name,
|
437
450
|
statement_params=statement_params
|
438
451
|
)
|
@@ -478,7 +491,7 @@ class LedoitWolf(BaseTransformer):
|
|
478
491
|
|
479
492
|
# Register vectorized UDF for batch inference
|
480
493
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
481
|
-
safe_id=self.
|
494
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
482
495
|
|
483
496
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
484
497
|
# will try to pickle all of self which fails.
|
@@ -570,7 +583,7 @@ class LedoitWolf(BaseTransformer):
|
|
570
583
|
return transformed_pandas_df.to_dict("records")
|
571
584
|
|
572
585
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
573
|
-
safe_id=self.
|
586
|
+
safe_id=self._get_rand_id()
|
574
587
|
)
|
575
588
|
|
576
589
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -735,11 +748,18 @@ class LedoitWolf(BaseTransformer):
|
|
735
748
|
Transformed dataset.
|
736
749
|
"""
|
737
750
|
if isinstance(dataset, DataFrame):
|
751
|
+
expected_type_inferred = ""
|
752
|
+
# when it is classifier, infer the datatype from label columns
|
753
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
754
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
755
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
756
|
+
)
|
757
|
+
|
738
758
|
output_df = self._batch_inference(
|
739
759
|
dataset=dataset,
|
740
760
|
inference_method="predict",
|
741
761
|
expected_output_cols_list=self.output_cols,
|
742
|
-
expected_output_cols_type=
|
762
|
+
expected_output_cols_type=expected_type_inferred,
|
743
763
|
)
|
744
764
|
elif isinstance(dataset, pd.DataFrame):
|
745
765
|
output_df = self._sklearn_inference(
|
@@ -810,10 +830,10 @@ class LedoitWolf(BaseTransformer):
|
|
810
830
|
|
811
831
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
812
832
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
813
|
-
Returns
|
833
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
814
834
|
"""
|
815
835
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
816
|
-
return []
|
836
|
+
return [output_cols_prefix]
|
817
837
|
|
818
838
|
classes = self._sklearn_object.classes_
|
819
839
|
if isinstance(classes, numpy.ndarray):
|
@@ -1038,7 +1058,7 @@ class LedoitWolf(BaseTransformer):
|
|
1038
1058
|
cp.dump(self._sklearn_object, local_score_file)
|
1039
1059
|
|
1040
1060
|
# Create temp stage to run score.
|
1041
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1061
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1042
1062
|
session = dataset._session
|
1043
1063
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1044
1064
|
SqlResultValidator(
|
@@ -1052,8 +1072,9 @@ class LedoitWolf(BaseTransformer):
|
|
1052
1072
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1053
1073
|
).validate()
|
1054
1074
|
|
1055
|
-
|
1056
|
-
|
1075
|
+
# Use posixpath to construct stage paths
|
1076
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1077
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1057
1078
|
statement_params = telemetry.get_function_usage_statement_params(
|
1058
1079
|
project=_PROJECT,
|
1059
1080
|
subproject=_SUBPROJECT,
|
@@ -1079,6 +1100,7 @@ class LedoitWolf(BaseTransformer):
|
|
1079
1100
|
replace=True,
|
1080
1101
|
session=session,
|
1081
1102
|
statement_params=statement_params,
|
1103
|
+
anonymous=True
|
1082
1104
|
)
|
1083
1105
|
def score_wrapper_sproc(
|
1084
1106
|
session: Session,
|
@@ -1086,7 +1108,8 @@ class LedoitWolf(BaseTransformer):
|
|
1086
1108
|
stage_score_file_name: str,
|
1087
1109
|
input_cols: List[str],
|
1088
1110
|
label_cols: List[str],
|
1089
|
-
sample_weight_col: Optional[str]
|
1111
|
+
sample_weight_col: Optional[str],
|
1112
|
+
statement_params: Dict[str, str]
|
1090
1113
|
) -> float:
|
1091
1114
|
import cloudpickle as cp
|
1092
1115
|
import numpy as np
|
@@ -1136,14 +1159,14 @@ class LedoitWolf(BaseTransformer):
|
|
1136
1159
|
api_calls=[Session.call],
|
1137
1160
|
custom_tags=dict([("autogen", True)]),
|
1138
1161
|
)
|
1139
|
-
score =
|
1140
|
-
|
1162
|
+
score = score_wrapper_sproc(
|
1163
|
+
session,
|
1141
1164
|
query,
|
1142
1165
|
stage_score_file_name,
|
1143
1166
|
identifier.get_unescaped_names(self.input_cols),
|
1144
1167
|
identifier.get_unescaped_names(self.label_cols),
|
1145
1168
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1146
|
-
statement_params
|
1169
|
+
statement_params,
|
1147
1170
|
)
|
1148
1171
|
|
1149
1172
|
cleanup_temp_files([local_score_file_name])
|
@@ -1161,18 +1184,20 @@ class LedoitWolf(BaseTransformer):
|
|
1161
1184
|
if self._sklearn_object._estimator_type == 'classifier':
|
1162
1185
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1163
1186
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1164
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1187
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1188
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1165
1189
|
# For regressor, the type of predict is float64
|
1166
1190
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1167
1191
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1168
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1169
|
-
|
1192
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1193
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1170
1194
|
for prob_func in PROB_FUNCTIONS:
|
1171
1195
|
if hasattr(self, prob_func):
|
1172
1196
|
output_cols_prefix: str = f"{prob_func}_"
|
1173
1197
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1174
1198
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1175
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1199
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1200
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1176
1201
|
|
1177
1202
|
@property
|
1178
1203
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -207,7 +209,6 @@ class MinCovDet(BaseTransformer):
|
|
207
209
|
sample_weight_col: Optional[str] = None,
|
208
210
|
) -> None:
|
209
211
|
super().__init__()
|
210
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
211
212
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
212
213
|
|
213
214
|
self._deps = list(deps)
|
@@ -230,6 +231,15 @@ class MinCovDet(BaseTransformer):
|
|
230
231
|
self.set_drop_input_cols(drop_input_cols)
|
231
232
|
self.set_sample_weight_col(sample_weight_col)
|
232
233
|
|
234
|
+
def _get_rand_id(self) -> str:
|
235
|
+
"""
|
236
|
+
Generate random id to be used in sproc and stage names.
|
237
|
+
|
238
|
+
Returns:
|
239
|
+
Random id string usable in sproc, table, and stage names.
|
240
|
+
"""
|
241
|
+
return str(uuid4()).replace("-", "_").upper()
|
242
|
+
|
233
243
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
234
244
|
"""
|
235
245
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -308,7 +318,7 @@ class MinCovDet(BaseTransformer):
|
|
308
318
|
cp.dump(self._sklearn_object, local_transform_file)
|
309
319
|
|
310
320
|
# Create temp stage to run fit.
|
311
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
321
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
312
322
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
313
323
|
SqlResultValidator(
|
314
324
|
session=session,
|
@@ -321,11 +331,12 @@ class MinCovDet(BaseTransformer):
|
|
321
331
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
322
332
|
).validate()
|
323
333
|
|
324
|
-
|
334
|
+
# Use posixpath to construct stage paths
|
335
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
336
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
325
337
|
local_result_file_name = get_temp_file_path()
|
326
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
327
338
|
|
328
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
339
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
329
340
|
statement_params = telemetry.get_function_usage_statement_params(
|
330
341
|
project=_PROJECT,
|
331
342
|
subproject=_SUBPROJECT,
|
@@ -351,6 +362,7 @@ class MinCovDet(BaseTransformer):
|
|
351
362
|
replace=True,
|
352
363
|
session=session,
|
353
364
|
statement_params=statement_params,
|
365
|
+
anonymous=True
|
354
366
|
)
|
355
367
|
def fit_wrapper_sproc(
|
356
368
|
session: Session,
|
@@ -359,7 +371,8 @@ class MinCovDet(BaseTransformer):
|
|
359
371
|
stage_result_file_name: str,
|
360
372
|
input_cols: List[str],
|
361
373
|
label_cols: List[str],
|
362
|
-
sample_weight_col: Optional[str]
|
374
|
+
sample_weight_col: Optional[str],
|
375
|
+
statement_params: Dict[str, str]
|
363
376
|
) -> str:
|
364
377
|
import cloudpickle as cp
|
365
378
|
import numpy as np
|
@@ -426,15 +439,15 @@ class MinCovDet(BaseTransformer):
|
|
426
439
|
api_calls=[Session.call],
|
427
440
|
custom_tags=dict([("autogen", True)]),
|
428
441
|
)
|
429
|
-
sproc_export_file_name =
|
430
|
-
|
442
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
443
|
+
session,
|
431
444
|
query,
|
432
445
|
stage_transform_file_name,
|
433
446
|
stage_result_file_name,
|
434
447
|
identifier.get_unescaped_names(self.input_cols),
|
435
448
|
identifier.get_unescaped_names(self.label_cols),
|
436
449
|
identifier.get_unescaped_names(self.sample_weight_col),
|
437
|
-
statement_params
|
450
|
+
statement_params,
|
438
451
|
)
|
439
452
|
|
440
453
|
if "|" in sproc_export_file_name:
|
@@ -444,7 +457,7 @@ class MinCovDet(BaseTransformer):
|
|
444
457
|
print("\n".join(fields[1:]))
|
445
458
|
|
446
459
|
session.file.get(
|
447
|
-
|
460
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
448
461
|
local_result_file_name,
|
449
462
|
statement_params=statement_params
|
450
463
|
)
|
@@ -490,7 +503,7 @@ class MinCovDet(BaseTransformer):
|
|
490
503
|
|
491
504
|
# Register vectorized UDF for batch inference
|
492
505
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
493
|
-
safe_id=self.
|
506
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
494
507
|
|
495
508
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
496
509
|
# will try to pickle all of self which fails.
|
@@ -582,7 +595,7 @@ class MinCovDet(BaseTransformer):
|
|
582
595
|
return transformed_pandas_df.to_dict("records")
|
583
596
|
|
584
597
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
585
|
-
safe_id=self.
|
598
|
+
safe_id=self._get_rand_id()
|
586
599
|
)
|
587
600
|
|
588
601
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -747,11 +760,18 @@ class MinCovDet(BaseTransformer):
|
|
747
760
|
Transformed dataset.
|
748
761
|
"""
|
749
762
|
if isinstance(dataset, DataFrame):
|
763
|
+
expected_type_inferred = ""
|
764
|
+
# when it is classifier, infer the datatype from label columns
|
765
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
766
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
767
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
768
|
+
)
|
769
|
+
|
750
770
|
output_df = self._batch_inference(
|
751
771
|
dataset=dataset,
|
752
772
|
inference_method="predict",
|
753
773
|
expected_output_cols_list=self.output_cols,
|
754
|
-
expected_output_cols_type=
|
774
|
+
expected_output_cols_type=expected_type_inferred,
|
755
775
|
)
|
756
776
|
elif isinstance(dataset, pd.DataFrame):
|
757
777
|
output_df = self._sklearn_inference(
|
@@ -822,10 +842,10 @@ class MinCovDet(BaseTransformer):
|
|
822
842
|
|
823
843
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
824
844
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
825
|
-
Returns
|
845
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
826
846
|
"""
|
827
847
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
828
|
-
return []
|
848
|
+
return [output_cols_prefix]
|
829
849
|
|
830
850
|
classes = self._sklearn_object.classes_
|
831
851
|
if isinstance(classes, numpy.ndarray):
|
@@ -1050,7 +1070,7 @@ class MinCovDet(BaseTransformer):
|
|
1050
1070
|
cp.dump(self._sklearn_object, local_score_file)
|
1051
1071
|
|
1052
1072
|
# Create temp stage to run score.
|
1053
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1073
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1054
1074
|
session = dataset._session
|
1055
1075
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1056
1076
|
SqlResultValidator(
|
@@ -1064,8 +1084,9 @@ class MinCovDet(BaseTransformer):
|
|
1064
1084
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1065
1085
|
).validate()
|
1066
1086
|
|
1067
|
-
|
1068
|
-
|
1087
|
+
# Use posixpath to construct stage paths
|
1088
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1089
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1069
1090
|
statement_params = telemetry.get_function_usage_statement_params(
|
1070
1091
|
project=_PROJECT,
|
1071
1092
|
subproject=_SUBPROJECT,
|
@@ -1091,6 +1112,7 @@ class MinCovDet(BaseTransformer):
|
|
1091
1112
|
replace=True,
|
1092
1113
|
session=session,
|
1093
1114
|
statement_params=statement_params,
|
1115
|
+
anonymous=True
|
1094
1116
|
)
|
1095
1117
|
def score_wrapper_sproc(
|
1096
1118
|
session: Session,
|
@@ -1098,7 +1120,8 @@ class MinCovDet(BaseTransformer):
|
|
1098
1120
|
stage_score_file_name: str,
|
1099
1121
|
input_cols: List[str],
|
1100
1122
|
label_cols: List[str],
|
1101
|
-
sample_weight_col: Optional[str]
|
1123
|
+
sample_weight_col: Optional[str],
|
1124
|
+
statement_params: Dict[str, str]
|
1102
1125
|
) -> float:
|
1103
1126
|
import cloudpickle as cp
|
1104
1127
|
import numpy as np
|
@@ -1148,14 +1171,14 @@ class MinCovDet(BaseTransformer):
|
|
1148
1171
|
api_calls=[Session.call],
|
1149
1172
|
custom_tags=dict([("autogen", True)]),
|
1150
1173
|
)
|
1151
|
-
score =
|
1152
|
-
|
1174
|
+
score = score_wrapper_sproc(
|
1175
|
+
session,
|
1153
1176
|
query,
|
1154
1177
|
stage_score_file_name,
|
1155
1178
|
identifier.get_unescaped_names(self.input_cols),
|
1156
1179
|
identifier.get_unescaped_names(self.label_cols),
|
1157
1180
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1158
|
-
statement_params
|
1181
|
+
statement_params,
|
1159
1182
|
)
|
1160
1183
|
|
1161
1184
|
cleanup_temp_files([local_score_file_name])
|
@@ -1173,18 +1196,20 @@ class MinCovDet(BaseTransformer):
|
|
1173
1196
|
if self._sklearn_object._estimator_type == 'classifier':
|
1174
1197
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1175
1198
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1176
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1199
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1200
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1177
1201
|
# For regressor, the type of predict is float64
|
1178
1202
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1179
1203
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1180
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1181
|
-
|
1204
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1205
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1182
1206
|
for prob_func in PROB_FUNCTIONS:
|
1183
1207
|
if hasattr(self, prob_func):
|
1184
1208
|
output_cols_prefix: str = f"{prob_func}_"
|
1185
1209
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1186
1210
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1187
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1211
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1212
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1188
1213
|
|
1189
1214
|
@property
|
1190
1215
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|