snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -211,7 +213,6 @@ class EllipticEnvelope(BaseTransformer):
|
|
211
213
|
sample_weight_col: Optional[str] = None,
|
212
214
|
) -> None:
|
213
215
|
super().__init__()
|
214
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
215
216
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
216
217
|
|
217
218
|
self._deps = list(deps)
|
@@ -235,6 +236,15 @@ class EllipticEnvelope(BaseTransformer):
|
|
235
236
|
self.set_drop_input_cols(drop_input_cols)
|
236
237
|
self.set_sample_weight_col(sample_weight_col)
|
237
238
|
|
239
|
+
def _get_rand_id(self) -> str:
|
240
|
+
"""
|
241
|
+
Generate random id to be used in sproc and stage names.
|
242
|
+
|
243
|
+
Returns:
|
244
|
+
Random id string usable in sproc, table, and stage names.
|
245
|
+
"""
|
246
|
+
return str(uuid4()).replace("-", "_").upper()
|
247
|
+
|
238
248
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
239
249
|
"""
|
240
250
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -313,7 +323,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
313
323
|
cp.dump(self._sklearn_object, local_transform_file)
|
314
324
|
|
315
325
|
# Create temp stage to run fit.
|
316
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
326
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
317
327
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
318
328
|
SqlResultValidator(
|
319
329
|
session=session,
|
@@ -326,11 +336,12 @@ class EllipticEnvelope(BaseTransformer):
|
|
326
336
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
327
337
|
).validate()
|
328
338
|
|
329
|
-
|
339
|
+
# Use posixpath to construct stage paths
|
340
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
341
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
330
342
|
local_result_file_name = get_temp_file_path()
|
331
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
332
343
|
|
333
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
344
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
334
345
|
statement_params = telemetry.get_function_usage_statement_params(
|
335
346
|
project=_PROJECT,
|
336
347
|
subproject=_SUBPROJECT,
|
@@ -356,6 +367,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
356
367
|
replace=True,
|
357
368
|
session=session,
|
358
369
|
statement_params=statement_params,
|
370
|
+
anonymous=True
|
359
371
|
)
|
360
372
|
def fit_wrapper_sproc(
|
361
373
|
session: Session,
|
@@ -364,7 +376,8 @@ class EllipticEnvelope(BaseTransformer):
|
|
364
376
|
stage_result_file_name: str,
|
365
377
|
input_cols: List[str],
|
366
378
|
label_cols: List[str],
|
367
|
-
sample_weight_col: Optional[str]
|
379
|
+
sample_weight_col: Optional[str],
|
380
|
+
statement_params: Dict[str, str]
|
368
381
|
) -> str:
|
369
382
|
import cloudpickle as cp
|
370
383
|
import numpy as np
|
@@ -431,15 +444,15 @@ class EllipticEnvelope(BaseTransformer):
|
|
431
444
|
api_calls=[Session.call],
|
432
445
|
custom_tags=dict([("autogen", True)]),
|
433
446
|
)
|
434
|
-
sproc_export_file_name =
|
435
|
-
|
447
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
448
|
+
session,
|
436
449
|
query,
|
437
450
|
stage_transform_file_name,
|
438
451
|
stage_result_file_name,
|
439
452
|
identifier.get_unescaped_names(self.input_cols),
|
440
453
|
identifier.get_unescaped_names(self.label_cols),
|
441
454
|
identifier.get_unescaped_names(self.sample_weight_col),
|
442
|
-
statement_params
|
455
|
+
statement_params,
|
443
456
|
)
|
444
457
|
|
445
458
|
if "|" in sproc_export_file_name:
|
@@ -449,7 +462,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
449
462
|
print("\n".join(fields[1:]))
|
450
463
|
|
451
464
|
session.file.get(
|
452
|
-
|
465
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
453
466
|
local_result_file_name,
|
454
467
|
statement_params=statement_params
|
455
468
|
)
|
@@ -495,7 +508,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
495
508
|
|
496
509
|
# Register vectorized UDF for batch inference
|
497
510
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
498
|
-
safe_id=self.
|
511
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
499
512
|
|
500
513
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
501
514
|
# will try to pickle all of self which fails.
|
@@ -587,7 +600,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
587
600
|
return transformed_pandas_df.to_dict("records")
|
588
601
|
|
589
602
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
590
|
-
safe_id=self.
|
603
|
+
safe_id=self._get_rand_id()
|
591
604
|
)
|
592
605
|
|
593
606
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -754,11 +767,18 @@ class EllipticEnvelope(BaseTransformer):
|
|
754
767
|
Transformed dataset.
|
755
768
|
"""
|
756
769
|
if isinstance(dataset, DataFrame):
|
770
|
+
expected_type_inferred = ""
|
771
|
+
# when it is classifier, infer the datatype from label columns
|
772
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
773
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
774
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
775
|
+
)
|
776
|
+
|
757
777
|
output_df = self._batch_inference(
|
758
778
|
dataset=dataset,
|
759
779
|
inference_method="predict",
|
760
780
|
expected_output_cols_list=self.output_cols,
|
761
|
-
expected_output_cols_type=
|
781
|
+
expected_output_cols_type=expected_type_inferred,
|
762
782
|
)
|
763
783
|
elif isinstance(dataset, pd.DataFrame):
|
764
784
|
output_df = self._sklearn_inference(
|
@@ -829,10 +849,10 @@ class EllipticEnvelope(BaseTransformer):
|
|
829
849
|
|
830
850
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
831
851
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
832
|
-
Returns
|
852
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
833
853
|
"""
|
834
854
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
835
|
-
return []
|
855
|
+
return [output_cols_prefix]
|
836
856
|
|
837
857
|
classes = self._sklearn_object.classes_
|
838
858
|
if isinstance(classes, numpy.ndarray):
|
@@ -1059,7 +1079,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
1059
1079
|
cp.dump(self._sklearn_object, local_score_file)
|
1060
1080
|
|
1061
1081
|
# Create temp stage to run score.
|
1062
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1082
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1063
1083
|
session = dataset._session
|
1064
1084
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1065
1085
|
SqlResultValidator(
|
@@ -1073,8 +1093,9 @@ class EllipticEnvelope(BaseTransformer):
|
|
1073
1093
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1074
1094
|
).validate()
|
1075
1095
|
|
1076
|
-
|
1077
|
-
|
1096
|
+
# Use posixpath to construct stage paths
|
1097
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1098
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1078
1099
|
statement_params = telemetry.get_function_usage_statement_params(
|
1079
1100
|
project=_PROJECT,
|
1080
1101
|
subproject=_SUBPROJECT,
|
@@ -1100,6 +1121,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
1100
1121
|
replace=True,
|
1101
1122
|
session=session,
|
1102
1123
|
statement_params=statement_params,
|
1124
|
+
anonymous=True
|
1103
1125
|
)
|
1104
1126
|
def score_wrapper_sproc(
|
1105
1127
|
session: Session,
|
@@ -1107,7 +1129,8 @@ class EllipticEnvelope(BaseTransformer):
|
|
1107
1129
|
stage_score_file_name: str,
|
1108
1130
|
input_cols: List[str],
|
1109
1131
|
label_cols: List[str],
|
1110
|
-
sample_weight_col: Optional[str]
|
1132
|
+
sample_weight_col: Optional[str],
|
1133
|
+
statement_params: Dict[str, str]
|
1111
1134
|
) -> float:
|
1112
1135
|
import cloudpickle as cp
|
1113
1136
|
import numpy as np
|
@@ -1157,14 +1180,14 @@ class EllipticEnvelope(BaseTransformer):
|
|
1157
1180
|
api_calls=[Session.call],
|
1158
1181
|
custom_tags=dict([("autogen", True)]),
|
1159
1182
|
)
|
1160
|
-
score =
|
1161
|
-
|
1183
|
+
score = score_wrapper_sproc(
|
1184
|
+
session,
|
1162
1185
|
query,
|
1163
1186
|
stage_score_file_name,
|
1164
1187
|
identifier.get_unescaped_names(self.input_cols),
|
1165
1188
|
identifier.get_unescaped_names(self.label_cols),
|
1166
1189
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1167
|
-
statement_params
|
1190
|
+
statement_params,
|
1168
1191
|
)
|
1169
1192
|
|
1170
1193
|
cleanup_temp_files([local_score_file_name])
|
@@ -1182,18 +1205,20 @@ class EllipticEnvelope(BaseTransformer):
|
|
1182
1205
|
if self._sklearn_object._estimator_type == 'classifier':
|
1183
1206
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1184
1207
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1185
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1208
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1209
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1186
1210
|
# For regressor, the type of predict is float64
|
1187
1211
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1188
1212
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1189
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1190
|
-
|
1213
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1214
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1191
1215
|
for prob_func in PROB_FUNCTIONS:
|
1192
1216
|
if hasattr(self, prob_func):
|
1193
1217
|
output_cols_prefix: str = f"{prob_func}_"
|
1194
1218
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1195
1219
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1196
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1220
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1221
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1197
1222
|
|
1198
1223
|
@property
|
1199
1224
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -190,7 +192,6 @@ class EmpiricalCovariance(BaseTransformer):
|
|
190
192
|
sample_weight_col: Optional[str] = None,
|
191
193
|
) -> None:
|
192
194
|
super().__init__()
|
193
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
194
195
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
195
196
|
|
196
197
|
self._deps = list(deps)
|
@@ -211,6 +212,15 @@ class EmpiricalCovariance(BaseTransformer):
|
|
211
212
|
self.set_drop_input_cols(drop_input_cols)
|
212
213
|
self.set_sample_weight_col(sample_weight_col)
|
213
214
|
|
215
|
+
def _get_rand_id(self) -> str:
|
216
|
+
"""
|
217
|
+
Generate random id to be used in sproc and stage names.
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
Random id string usable in sproc, table, and stage names.
|
221
|
+
"""
|
222
|
+
return str(uuid4()).replace("-", "_").upper()
|
223
|
+
|
214
224
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
215
225
|
"""
|
216
226
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -289,7 +299,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
289
299
|
cp.dump(self._sklearn_object, local_transform_file)
|
290
300
|
|
291
301
|
# Create temp stage to run fit.
|
292
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
302
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
293
303
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
294
304
|
SqlResultValidator(
|
295
305
|
session=session,
|
@@ -302,11 +312,12 @@ class EmpiricalCovariance(BaseTransformer):
|
|
302
312
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
303
313
|
).validate()
|
304
314
|
|
305
|
-
|
315
|
+
# Use posixpath to construct stage paths
|
316
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
317
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
306
318
|
local_result_file_name = get_temp_file_path()
|
307
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
308
319
|
|
309
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
320
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
310
321
|
statement_params = telemetry.get_function_usage_statement_params(
|
311
322
|
project=_PROJECT,
|
312
323
|
subproject=_SUBPROJECT,
|
@@ -332,6 +343,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
332
343
|
replace=True,
|
333
344
|
session=session,
|
334
345
|
statement_params=statement_params,
|
346
|
+
anonymous=True
|
335
347
|
)
|
336
348
|
def fit_wrapper_sproc(
|
337
349
|
session: Session,
|
@@ -340,7 +352,8 @@ class EmpiricalCovariance(BaseTransformer):
|
|
340
352
|
stage_result_file_name: str,
|
341
353
|
input_cols: List[str],
|
342
354
|
label_cols: List[str],
|
343
|
-
sample_weight_col: Optional[str]
|
355
|
+
sample_weight_col: Optional[str],
|
356
|
+
statement_params: Dict[str, str]
|
344
357
|
) -> str:
|
345
358
|
import cloudpickle as cp
|
346
359
|
import numpy as np
|
@@ -407,15 +420,15 @@ class EmpiricalCovariance(BaseTransformer):
|
|
407
420
|
api_calls=[Session.call],
|
408
421
|
custom_tags=dict([("autogen", True)]),
|
409
422
|
)
|
410
|
-
sproc_export_file_name =
|
411
|
-
|
423
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
424
|
+
session,
|
412
425
|
query,
|
413
426
|
stage_transform_file_name,
|
414
427
|
stage_result_file_name,
|
415
428
|
identifier.get_unescaped_names(self.input_cols),
|
416
429
|
identifier.get_unescaped_names(self.label_cols),
|
417
430
|
identifier.get_unescaped_names(self.sample_weight_col),
|
418
|
-
statement_params
|
431
|
+
statement_params,
|
419
432
|
)
|
420
433
|
|
421
434
|
if "|" in sproc_export_file_name:
|
@@ -425,7 +438,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
425
438
|
print("\n".join(fields[1:]))
|
426
439
|
|
427
440
|
session.file.get(
|
428
|
-
|
441
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
429
442
|
local_result_file_name,
|
430
443
|
statement_params=statement_params
|
431
444
|
)
|
@@ -471,7 +484,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
471
484
|
|
472
485
|
# Register vectorized UDF for batch inference
|
473
486
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
474
|
-
safe_id=self.
|
487
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
475
488
|
|
476
489
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
477
490
|
# will try to pickle all of self which fails.
|
@@ -563,7 +576,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
563
576
|
return transformed_pandas_df.to_dict("records")
|
564
577
|
|
565
578
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
566
|
-
safe_id=self.
|
579
|
+
safe_id=self._get_rand_id()
|
567
580
|
)
|
568
581
|
|
569
582
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -728,11 +741,18 @@ class EmpiricalCovariance(BaseTransformer):
|
|
728
741
|
Transformed dataset.
|
729
742
|
"""
|
730
743
|
if isinstance(dataset, DataFrame):
|
744
|
+
expected_type_inferred = ""
|
745
|
+
# when it is classifier, infer the datatype from label columns
|
746
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
747
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
748
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
749
|
+
)
|
750
|
+
|
731
751
|
output_df = self._batch_inference(
|
732
752
|
dataset=dataset,
|
733
753
|
inference_method="predict",
|
734
754
|
expected_output_cols_list=self.output_cols,
|
735
|
-
expected_output_cols_type=
|
755
|
+
expected_output_cols_type=expected_type_inferred,
|
736
756
|
)
|
737
757
|
elif isinstance(dataset, pd.DataFrame):
|
738
758
|
output_df = self._sklearn_inference(
|
@@ -803,10 +823,10 @@ class EmpiricalCovariance(BaseTransformer):
|
|
803
823
|
|
804
824
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
805
825
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
806
|
-
Returns
|
826
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
807
827
|
"""
|
808
828
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
809
|
-
return []
|
829
|
+
return [output_cols_prefix]
|
810
830
|
|
811
831
|
classes = self._sklearn_object.classes_
|
812
832
|
if isinstance(classes, numpy.ndarray):
|
@@ -1031,7 +1051,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
1031
1051
|
cp.dump(self._sklearn_object, local_score_file)
|
1032
1052
|
|
1033
1053
|
# Create temp stage to run score.
|
1034
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1054
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1035
1055
|
session = dataset._session
|
1036
1056
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1037
1057
|
SqlResultValidator(
|
@@ -1045,8 +1065,9 @@ class EmpiricalCovariance(BaseTransformer):
|
|
1045
1065
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1046
1066
|
).validate()
|
1047
1067
|
|
1048
|
-
|
1049
|
-
|
1068
|
+
# Use posixpath to construct stage paths
|
1069
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1070
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1050
1071
|
statement_params = telemetry.get_function_usage_statement_params(
|
1051
1072
|
project=_PROJECT,
|
1052
1073
|
subproject=_SUBPROJECT,
|
@@ -1072,6 +1093,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
1072
1093
|
replace=True,
|
1073
1094
|
session=session,
|
1074
1095
|
statement_params=statement_params,
|
1096
|
+
anonymous=True
|
1075
1097
|
)
|
1076
1098
|
def score_wrapper_sproc(
|
1077
1099
|
session: Session,
|
@@ -1079,7 +1101,8 @@ class EmpiricalCovariance(BaseTransformer):
|
|
1079
1101
|
stage_score_file_name: str,
|
1080
1102
|
input_cols: List[str],
|
1081
1103
|
label_cols: List[str],
|
1082
|
-
sample_weight_col: Optional[str]
|
1104
|
+
sample_weight_col: Optional[str],
|
1105
|
+
statement_params: Dict[str, str]
|
1083
1106
|
) -> float:
|
1084
1107
|
import cloudpickle as cp
|
1085
1108
|
import numpy as np
|
@@ -1129,14 +1152,14 @@ class EmpiricalCovariance(BaseTransformer):
|
|
1129
1152
|
api_calls=[Session.call],
|
1130
1153
|
custom_tags=dict([("autogen", True)]),
|
1131
1154
|
)
|
1132
|
-
score =
|
1133
|
-
|
1155
|
+
score = score_wrapper_sproc(
|
1156
|
+
session,
|
1134
1157
|
query,
|
1135
1158
|
stage_score_file_name,
|
1136
1159
|
identifier.get_unescaped_names(self.input_cols),
|
1137
1160
|
identifier.get_unescaped_names(self.label_cols),
|
1138
1161
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1139
|
-
statement_params
|
1162
|
+
statement_params,
|
1140
1163
|
)
|
1141
1164
|
|
1142
1165
|
cleanup_temp_files([local_score_file_name])
|
@@ -1154,18 +1177,20 @@ class EmpiricalCovariance(BaseTransformer):
|
|
1154
1177
|
if self._sklearn_object._estimator_type == 'classifier':
|
1155
1178
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1156
1179
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1157
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1180
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1181
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1158
1182
|
# For regressor, the type of predict is float64
|
1159
1183
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1160
1184
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1161
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1162
|
-
|
1185
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1186
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1163
1187
|
for prob_func in PROB_FUNCTIONS:
|
1164
1188
|
if hasattr(self, prob_func):
|
1165
1189
|
output_cols_prefix: str = f"{prob_func}_"
|
1166
1190
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1167
1191
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1168
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1192
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1193
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1169
1194
|
|
1170
1195
|
@property
|
1171
1196
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|