snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -190,7 +192,6 @@ class OAS(BaseTransformer):
|
|
190
192
|
sample_weight_col: Optional[str] = None,
|
191
193
|
) -> None:
|
192
194
|
super().__init__()
|
193
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
194
195
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
195
196
|
|
196
197
|
self._deps = list(deps)
|
@@ -211,6 +212,15 @@ class OAS(BaseTransformer):
|
|
211
212
|
self.set_drop_input_cols(drop_input_cols)
|
212
213
|
self.set_sample_weight_col(sample_weight_col)
|
213
214
|
|
215
|
+
def _get_rand_id(self) -> str:
|
216
|
+
"""
|
217
|
+
Generate random id to be used in sproc and stage names.
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
Random id string usable in sproc, table, and stage names.
|
221
|
+
"""
|
222
|
+
return str(uuid4()).replace("-", "_").upper()
|
223
|
+
|
214
224
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
215
225
|
"""
|
216
226
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -289,7 +299,7 @@ class OAS(BaseTransformer):
|
|
289
299
|
cp.dump(self._sklearn_object, local_transform_file)
|
290
300
|
|
291
301
|
# Create temp stage to run fit.
|
292
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
302
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
293
303
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
294
304
|
SqlResultValidator(
|
295
305
|
session=session,
|
@@ -302,11 +312,12 @@ class OAS(BaseTransformer):
|
|
302
312
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
303
313
|
).validate()
|
304
314
|
|
305
|
-
|
315
|
+
# Use posixpath to construct stage paths
|
316
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
317
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
306
318
|
local_result_file_name = get_temp_file_path()
|
307
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
308
319
|
|
309
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
320
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
310
321
|
statement_params = telemetry.get_function_usage_statement_params(
|
311
322
|
project=_PROJECT,
|
312
323
|
subproject=_SUBPROJECT,
|
@@ -332,6 +343,7 @@ class OAS(BaseTransformer):
|
|
332
343
|
replace=True,
|
333
344
|
session=session,
|
334
345
|
statement_params=statement_params,
|
346
|
+
anonymous=True
|
335
347
|
)
|
336
348
|
def fit_wrapper_sproc(
|
337
349
|
session: Session,
|
@@ -340,7 +352,8 @@ class OAS(BaseTransformer):
|
|
340
352
|
stage_result_file_name: str,
|
341
353
|
input_cols: List[str],
|
342
354
|
label_cols: List[str],
|
343
|
-
sample_weight_col: Optional[str]
|
355
|
+
sample_weight_col: Optional[str],
|
356
|
+
statement_params: Dict[str, str]
|
344
357
|
) -> str:
|
345
358
|
import cloudpickle as cp
|
346
359
|
import numpy as np
|
@@ -407,15 +420,15 @@ class OAS(BaseTransformer):
|
|
407
420
|
api_calls=[Session.call],
|
408
421
|
custom_tags=dict([("autogen", True)]),
|
409
422
|
)
|
410
|
-
sproc_export_file_name =
|
411
|
-
|
423
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
424
|
+
session,
|
412
425
|
query,
|
413
426
|
stage_transform_file_name,
|
414
427
|
stage_result_file_name,
|
415
428
|
identifier.get_unescaped_names(self.input_cols),
|
416
429
|
identifier.get_unescaped_names(self.label_cols),
|
417
430
|
identifier.get_unescaped_names(self.sample_weight_col),
|
418
|
-
statement_params
|
431
|
+
statement_params,
|
419
432
|
)
|
420
433
|
|
421
434
|
if "|" in sproc_export_file_name:
|
@@ -425,7 +438,7 @@ class OAS(BaseTransformer):
|
|
425
438
|
print("\n".join(fields[1:]))
|
426
439
|
|
427
440
|
session.file.get(
|
428
|
-
|
441
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
429
442
|
local_result_file_name,
|
430
443
|
statement_params=statement_params
|
431
444
|
)
|
@@ -471,7 +484,7 @@ class OAS(BaseTransformer):
|
|
471
484
|
|
472
485
|
# Register vectorized UDF for batch inference
|
473
486
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
474
|
-
safe_id=self.
|
487
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
475
488
|
|
476
489
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
477
490
|
# will try to pickle all of self which fails.
|
@@ -563,7 +576,7 @@ class OAS(BaseTransformer):
|
|
563
576
|
return transformed_pandas_df.to_dict("records")
|
564
577
|
|
565
578
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
566
|
-
safe_id=self.
|
579
|
+
safe_id=self._get_rand_id()
|
567
580
|
)
|
568
581
|
|
569
582
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -728,11 +741,18 @@ class OAS(BaseTransformer):
|
|
728
741
|
Transformed dataset.
|
729
742
|
"""
|
730
743
|
if isinstance(dataset, DataFrame):
|
744
|
+
expected_type_inferred = ""
|
745
|
+
# when it is classifier, infer the datatype from label columns
|
746
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
747
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
748
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
749
|
+
)
|
750
|
+
|
731
751
|
output_df = self._batch_inference(
|
732
752
|
dataset=dataset,
|
733
753
|
inference_method="predict",
|
734
754
|
expected_output_cols_list=self.output_cols,
|
735
|
-
expected_output_cols_type=
|
755
|
+
expected_output_cols_type=expected_type_inferred,
|
736
756
|
)
|
737
757
|
elif isinstance(dataset, pd.DataFrame):
|
738
758
|
output_df = self._sklearn_inference(
|
@@ -803,10 +823,10 @@ class OAS(BaseTransformer):
|
|
803
823
|
|
804
824
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
805
825
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
806
|
-
Returns
|
826
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
807
827
|
"""
|
808
828
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
809
|
-
return []
|
829
|
+
return [output_cols_prefix]
|
810
830
|
|
811
831
|
classes = self._sklearn_object.classes_
|
812
832
|
if isinstance(classes, numpy.ndarray):
|
@@ -1031,7 +1051,7 @@ class OAS(BaseTransformer):
|
|
1031
1051
|
cp.dump(self._sklearn_object, local_score_file)
|
1032
1052
|
|
1033
1053
|
# Create temp stage to run score.
|
1034
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1054
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1035
1055
|
session = dataset._session
|
1036
1056
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1037
1057
|
SqlResultValidator(
|
@@ -1045,8 +1065,9 @@ class OAS(BaseTransformer):
|
|
1045
1065
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1046
1066
|
).validate()
|
1047
1067
|
|
1048
|
-
|
1049
|
-
|
1068
|
+
# Use posixpath to construct stage paths
|
1069
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1070
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1050
1071
|
statement_params = telemetry.get_function_usage_statement_params(
|
1051
1072
|
project=_PROJECT,
|
1052
1073
|
subproject=_SUBPROJECT,
|
@@ -1072,6 +1093,7 @@ class OAS(BaseTransformer):
|
|
1072
1093
|
replace=True,
|
1073
1094
|
session=session,
|
1074
1095
|
statement_params=statement_params,
|
1096
|
+
anonymous=True
|
1075
1097
|
)
|
1076
1098
|
def score_wrapper_sproc(
|
1077
1099
|
session: Session,
|
@@ -1079,7 +1101,8 @@ class OAS(BaseTransformer):
|
|
1079
1101
|
stage_score_file_name: str,
|
1080
1102
|
input_cols: List[str],
|
1081
1103
|
label_cols: List[str],
|
1082
|
-
sample_weight_col: Optional[str]
|
1104
|
+
sample_weight_col: Optional[str],
|
1105
|
+
statement_params: Dict[str, str]
|
1083
1106
|
) -> float:
|
1084
1107
|
import cloudpickle as cp
|
1085
1108
|
import numpy as np
|
@@ -1129,14 +1152,14 @@ class OAS(BaseTransformer):
|
|
1129
1152
|
api_calls=[Session.call],
|
1130
1153
|
custom_tags=dict([("autogen", True)]),
|
1131
1154
|
)
|
1132
|
-
score =
|
1133
|
-
|
1155
|
+
score = score_wrapper_sproc(
|
1156
|
+
session,
|
1134
1157
|
query,
|
1135
1158
|
stage_score_file_name,
|
1136
1159
|
identifier.get_unescaped_names(self.input_cols),
|
1137
1160
|
identifier.get_unescaped_names(self.label_cols),
|
1138
1161
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1139
|
-
statement_params
|
1162
|
+
statement_params,
|
1140
1163
|
)
|
1141
1164
|
|
1142
1165
|
cleanup_temp_files([local_score_file_name])
|
@@ -1154,18 +1177,20 @@ class OAS(BaseTransformer):
|
|
1154
1177
|
if self._sklearn_object._estimator_type == 'classifier':
|
1155
1178
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1156
1179
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1157
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1180
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1181
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1158
1182
|
# For regressor, the type of predict is float64
|
1159
1183
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1160
1184
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1161
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1162
|
-
|
1185
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1186
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1163
1187
|
for prob_func in PROB_FUNCTIONS:
|
1164
1188
|
if hasattr(self, prob_func):
|
1165
1189
|
output_cols_prefix: str = f"{prob_func}_"
|
1166
1190
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1167
1191
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1168
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1192
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1193
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1169
1194
|
|
1170
1195
|
@property
|
1171
1196
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -195,7 +197,6 @@ class ShrunkCovariance(BaseTransformer):
|
|
195
197
|
sample_weight_col: Optional[str] = None,
|
196
198
|
) -> None:
|
197
199
|
super().__init__()
|
198
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
199
200
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
200
201
|
|
201
202
|
self._deps = list(deps)
|
@@ -217,6 +218,15 @@ class ShrunkCovariance(BaseTransformer):
|
|
217
218
|
self.set_drop_input_cols(drop_input_cols)
|
218
219
|
self.set_sample_weight_col(sample_weight_col)
|
219
220
|
|
221
|
+
def _get_rand_id(self) -> str:
|
222
|
+
"""
|
223
|
+
Generate random id to be used in sproc and stage names.
|
224
|
+
|
225
|
+
Returns:
|
226
|
+
Random id string usable in sproc, table, and stage names.
|
227
|
+
"""
|
228
|
+
return str(uuid4()).replace("-", "_").upper()
|
229
|
+
|
220
230
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
221
231
|
"""
|
222
232
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -295,7 +305,7 @@ class ShrunkCovariance(BaseTransformer):
|
|
295
305
|
cp.dump(self._sklearn_object, local_transform_file)
|
296
306
|
|
297
307
|
# Create temp stage to run fit.
|
298
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
308
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
299
309
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
300
310
|
SqlResultValidator(
|
301
311
|
session=session,
|
@@ -308,11 +318,12 @@ class ShrunkCovariance(BaseTransformer):
|
|
308
318
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
309
319
|
).validate()
|
310
320
|
|
311
|
-
|
321
|
+
# Use posixpath to construct stage paths
|
322
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
323
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
312
324
|
local_result_file_name = get_temp_file_path()
|
313
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
314
325
|
|
315
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
326
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
316
327
|
statement_params = telemetry.get_function_usage_statement_params(
|
317
328
|
project=_PROJECT,
|
318
329
|
subproject=_SUBPROJECT,
|
@@ -338,6 +349,7 @@ class ShrunkCovariance(BaseTransformer):
|
|
338
349
|
replace=True,
|
339
350
|
session=session,
|
340
351
|
statement_params=statement_params,
|
352
|
+
anonymous=True
|
341
353
|
)
|
342
354
|
def fit_wrapper_sproc(
|
343
355
|
session: Session,
|
@@ -346,7 +358,8 @@ class ShrunkCovariance(BaseTransformer):
|
|
346
358
|
stage_result_file_name: str,
|
347
359
|
input_cols: List[str],
|
348
360
|
label_cols: List[str],
|
349
|
-
sample_weight_col: Optional[str]
|
361
|
+
sample_weight_col: Optional[str],
|
362
|
+
statement_params: Dict[str, str]
|
350
363
|
) -> str:
|
351
364
|
import cloudpickle as cp
|
352
365
|
import numpy as np
|
@@ -413,15 +426,15 @@ class ShrunkCovariance(BaseTransformer):
|
|
413
426
|
api_calls=[Session.call],
|
414
427
|
custom_tags=dict([("autogen", True)]),
|
415
428
|
)
|
416
|
-
sproc_export_file_name =
|
417
|
-
|
429
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
430
|
+
session,
|
418
431
|
query,
|
419
432
|
stage_transform_file_name,
|
420
433
|
stage_result_file_name,
|
421
434
|
identifier.get_unescaped_names(self.input_cols),
|
422
435
|
identifier.get_unescaped_names(self.label_cols),
|
423
436
|
identifier.get_unescaped_names(self.sample_weight_col),
|
424
|
-
statement_params
|
437
|
+
statement_params,
|
425
438
|
)
|
426
439
|
|
427
440
|
if "|" in sproc_export_file_name:
|
@@ -431,7 +444,7 @@ class ShrunkCovariance(BaseTransformer):
|
|
431
444
|
print("\n".join(fields[1:]))
|
432
445
|
|
433
446
|
session.file.get(
|
434
|
-
|
447
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
435
448
|
local_result_file_name,
|
436
449
|
statement_params=statement_params
|
437
450
|
)
|
@@ -477,7 +490,7 @@ class ShrunkCovariance(BaseTransformer):
|
|
477
490
|
|
478
491
|
# Register vectorized UDF for batch inference
|
479
492
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
480
|
-
safe_id=self.
|
493
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
481
494
|
|
482
495
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
483
496
|
# will try to pickle all of self which fails.
|
@@ -569,7 +582,7 @@ class ShrunkCovariance(BaseTransformer):
|
|
569
582
|
return transformed_pandas_df.to_dict("records")
|
570
583
|
|
571
584
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
572
|
-
safe_id=self.
|
585
|
+
safe_id=self._get_rand_id()
|
573
586
|
)
|
574
587
|
|
575
588
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -734,11 +747,18 @@ class ShrunkCovariance(BaseTransformer):
|
|
734
747
|
Transformed dataset.
|
735
748
|
"""
|
736
749
|
if isinstance(dataset, DataFrame):
|
750
|
+
expected_type_inferred = ""
|
751
|
+
# when it is classifier, infer the datatype from label columns
|
752
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
753
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
754
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
755
|
+
)
|
756
|
+
|
737
757
|
output_df = self._batch_inference(
|
738
758
|
dataset=dataset,
|
739
759
|
inference_method="predict",
|
740
760
|
expected_output_cols_list=self.output_cols,
|
741
|
-
expected_output_cols_type=
|
761
|
+
expected_output_cols_type=expected_type_inferred,
|
742
762
|
)
|
743
763
|
elif isinstance(dataset, pd.DataFrame):
|
744
764
|
output_df = self._sklearn_inference(
|
@@ -809,10 +829,10 @@ class ShrunkCovariance(BaseTransformer):
|
|
809
829
|
|
810
830
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
811
831
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
812
|
-
Returns
|
832
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
813
833
|
"""
|
814
834
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
815
|
-
return []
|
835
|
+
return [output_cols_prefix]
|
816
836
|
|
817
837
|
classes = self._sklearn_object.classes_
|
818
838
|
if isinstance(classes, numpy.ndarray):
|
@@ -1037,7 +1057,7 @@ class ShrunkCovariance(BaseTransformer):
|
|
1037
1057
|
cp.dump(self._sklearn_object, local_score_file)
|
1038
1058
|
|
1039
1059
|
# Create temp stage to run score.
|
1040
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1060
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1041
1061
|
session = dataset._session
|
1042
1062
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1043
1063
|
SqlResultValidator(
|
@@ -1051,8 +1071,9 @@ class ShrunkCovariance(BaseTransformer):
|
|
1051
1071
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1052
1072
|
).validate()
|
1053
1073
|
|
1054
|
-
|
1055
|
-
|
1074
|
+
# Use posixpath to construct stage paths
|
1075
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1076
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1056
1077
|
statement_params = telemetry.get_function_usage_statement_params(
|
1057
1078
|
project=_PROJECT,
|
1058
1079
|
subproject=_SUBPROJECT,
|
@@ -1078,6 +1099,7 @@ class ShrunkCovariance(BaseTransformer):
|
|
1078
1099
|
replace=True,
|
1079
1100
|
session=session,
|
1080
1101
|
statement_params=statement_params,
|
1102
|
+
anonymous=True
|
1081
1103
|
)
|
1082
1104
|
def score_wrapper_sproc(
|
1083
1105
|
session: Session,
|
@@ -1085,7 +1107,8 @@ class ShrunkCovariance(BaseTransformer):
|
|
1085
1107
|
stage_score_file_name: str,
|
1086
1108
|
input_cols: List[str],
|
1087
1109
|
label_cols: List[str],
|
1088
|
-
sample_weight_col: Optional[str]
|
1110
|
+
sample_weight_col: Optional[str],
|
1111
|
+
statement_params: Dict[str, str]
|
1089
1112
|
) -> float:
|
1090
1113
|
import cloudpickle as cp
|
1091
1114
|
import numpy as np
|
@@ -1135,14 +1158,14 @@ class ShrunkCovariance(BaseTransformer):
|
|
1135
1158
|
api_calls=[Session.call],
|
1136
1159
|
custom_tags=dict([("autogen", True)]),
|
1137
1160
|
)
|
1138
|
-
score =
|
1139
|
-
|
1161
|
+
score = score_wrapper_sproc(
|
1162
|
+
session,
|
1140
1163
|
query,
|
1141
1164
|
stage_score_file_name,
|
1142
1165
|
identifier.get_unescaped_names(self.input_cols),
|
1143
1166
|
identifier.get_unescaped_names(self.label_cols),
|
1144
1167
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1145
|
-
statement_params
|
1168
|
+
statement_params,
|
1146
1169
|
)
|
1147
1170
|
|
1148
1171
|
cleanup_temp_files([local_score_file_name])
|
@@ -1160,18 +1183,20 @@ class ShrunkCovariance(BaseTransformer):
|
|
1160
1183
|
if self._sklearn_object._estimator_type == 'classifier':
|
1161
1184
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1162
1185
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1163
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1186
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1187
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1164
1188
|
# For regressor, the type of predict is float64
|
1165
1189
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1166
1190
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1167
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1168
|
-
|
1191
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1192
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1169
1193
|
for prob_func in PROB_FUNCTIONS:
|
1170
1194
|
if hasattr(self, prob_func):
|
1171
1195
|
output_cols_prefix: str = f"{prob_func}_"
|
1172
1196
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1173
1197
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1174
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1198
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1199
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1175
1200
|
|
1176
1201
|
@property
|
1177
1202
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|