snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -189,7 +191,6 @@ class GaussianNB(BaseTransformer):
|
|
189
191
|
sample_weight_col: Optional[str] = None,
|
190
192
|
) -> None:
|
191
193
|
super().__init__()
|
192
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
193
194
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
194
195
|
|
195
196
|
self._deps = list(deps)
|
@@ -210,6 +211,15 @@ class GaussianNB(BaseTransformer):
|
|
210
211
|
self.set_drop_input_cols(drop_input_cols)
|
211
212
|
self.set_sample_weight_col(sample_weight_col)
|
212
213
|
|
214
|
+
def _get_rand_id(self) -> str:
|
215
|
+
"""
|
216
|
+
Generate random id to be used in sproc and stage names.
|
217
|
+
|
218
|
+
Returns:
|
219
|
+
Random id string usable in sproc, table, and stage names.
|
220
|
+
"""
|
221
|
+
return str(uuid4()).replace("-", "_").upper()
|
222
|
+
|
213
223
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
214
224
|
"""
|
215
225
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -288,7 +298,7 @@ class GaussianNB(BaseTransformer):
|
|
288
298
|
cp.dump(self._sklearn_object, local_transform_file)
|
289
299
|
|
290
300
|
# Create temp stage to run fit.
|
291
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
301
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
292
302
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
293
303
|
SqlResultValidator(
|
294
304
|
session=session,
|
@@ -301,11 +311,12 @@ class GaussianNB(BaseTransformer):
|
|
301
311
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
302
312
|
).validate()
|
303
313
|
|
304
|
-
|
314
|
+
# Use posixpath to construct stage paths
|
315
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
316
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
305
317
|
local_result_file_name = get_temp_file_path()
|
306
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
307
318
|
|
308
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
319
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
309
320
|
statement_params = telemetry.get_function_usage_statement_params(
|
310
321
|
project=_PROJECT,
|
311
322
|
subproject=_SUBPROJECT,
|
@@ -331,6 +342,7 @@ class GaussianNB(BaseTransformer):
|
|
331
342
|
replace=True,
|
332
343
|
session=session,
|
333
344
|
statement_params=statement_params,
|
345
|
+
anonymous=True
|
334
346
|
)
|
335
347
|
def fit_wrapper_sproc(
|
336
348
|
session: Session,
|
@@ -339,7 +351,8 @@ class GaussianNB(BaseTransformer):
|
|
339
351
|
stage_result_file_name: str,
|
340
352
|
input_cols: List[str],
|
341
353
|
label_cols: List[str],
|
342
|
-
sample_weight_col: Optional[str]
|
354
|
+
sample_weight_col: Optional[str],
|
355
|
+
statement_params: Dict[str, str]
|
343
356
|
) -> str:
|
344
357
|
import cloudpickle as cp
|
345
358
|
import numpy as np
|
@@ -406,15 +419,15 @@ class GaussianNB(BaseTransformer):
|
|
406
419
|
api_calls=[Session.call],
|
407
420
|
custom_tags=dict([("autogen", True)]),
|
408
421
|
)
|
409
|
-
sproc_export_file_name =
|
410
|
-
|
422
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
423
|
+
session,
|
411
424
|
query,
|
412
425
|
stage_transform_file_name,
|
413
426
|
stage_result_file_name,
|
414
427
|
identifier.get_unescaped_names(self.input_cols),
|
415
428
|
identifier.get_unescaped_names(self.label_cols),
|
416
429
|
identifier.get_unescaped_names(self.sample_weight_col),
|
417
|
-
statement_params
|
430
|
+
statement_params,
|
418
431
|
)
|
419
432
|
|
420
433
|
if "|" in sproc_export_file_name:
|
@@ -424,7 +437,7 @@ class GaussianNB(BaseTransformer):
|
|
424
437
|
print("\n".join(fields[1:]))
|
425
438
|
|
426
439
|
session.file.get(
|
427
|
-
|
440
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
428
441
|
local_result_file_name,
|
429
442
|
statement_params=statement_params
|
430
443
|
)
|
@@ -470,7 +483,7 @@ class GaussianNB(BaseTransformer):
|
|
470
483
|
|
471
484
|
# Register vectorized UDF for batch inference
|
472
485
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
473
|
-
safe_id=self.
|
486
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
474
487
|
|
475
488
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
476
489
|
# will try to pickle all of self which fails.
|
@@ -562,7 +575,7 @@ class GaussianNB(BaseTransformer):
|
|
562
575
|
return transformed_pandas_df.to_dict("records")
|
563
576
|
|
564
577
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
565
|
-
safe_id=self.
|
578
|
+
safe_id=self._get_rand_id()
|
566
579
|
)
|
567
580
|
|
568
581
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -729,11 +742,18 @@ class GaussianNB(BaseTransformer):
|
|
729
742
|
Transformed dataset.
|
730
743
|
"""
|
731
744
|
if isinstance(dataset, DataFrame):
|
745
|
+
expected_type_inferred = ""
|
746
|
+
# when it is classifier, infer the datatype from label columns
|
747
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
748
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
749
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
750
|
+
)
|
751
|
+
|
732
752
|
output_df = self._batch_inference(
|
733
753
|
dataset=dataset,
|
734
754
|
inference_method="predict",
|
735
755
|
expected_output_cols_list=self.output_cols,
|
736
|
-
expected_output_cols_type=
|
756
|
+
expected_output_cols_type=expected_type_inferred,
|
737
757
|
)
|
738
758
|
elif isinstance(dataset, pd.DataFrame):
|
739
759
|
output_df = self._sklearn_inference(
|
@@ -804,10 +824,10 @@ class GaussianNB(BaseTransformer):
|
|
804
824
|
|
805
825
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
806
826
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
807
|
-
Returns
|
827
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
808
828
|
"""
|
809
829
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
810
|
-
return []
|
830
|
+
return [output_cols_prefix]
|
811
831
|
|
812
832
|
classes = self._sklearn_object.classes_
|
813
833
|
if isinstance(classes, numpy.ndarray):
|
@@ -1036,7 +1056,7 @@ class GaussianNB(BaseTransformer):
|
|
1036
1056
|
cp.dump(self._sklearn_object, local_score_file)
|
1037
1057
|
|
1038
1058
|
# Create temp stage to run score.
|
1039
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1059
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1040
1060
|
session = dataset._session
|
1041
1061
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1042
1062
|
SqlResultValidator(
|
@@ -1050,8 +1070,9 @@ class GaussianNB(BaseTransformer):
|
|
1050
1070
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1051
1071
|
).validate()
|
1052
1072
|
|
1053
|
-
|
1054
|
-
|
1073
|
+
# Use posixpath to construct stage paths
|
1074
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1075
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1055
1076
|
statement_params = telemetry.get_function_usage_statement_params(
|
1056
1077
|
project=_PROJECT,
|
1057
1078
|
subproject=_SUBPROJECT,
|
@@ -1077,6 +1098,7 @@ class GaussianNB(BaseTransformer):
|
|
1077
1098
|
replace=True,
|
1078
1099
|
session=session,
|
1079
1100
|
statement_params=statement_params,
|
1101
|
+
anonymous=True
|
1080
1102
|
)
|
1081
1103
|
def score_wrapper_sproc(
|
1082
1104
|
session: Session,
|
@@ -1084,7 +1106,8 @@ class GaussianNB(BaseTransformer):
|
|
1084
1106
|
stage_score_file_name: str,
|
1085
1107
|
input_cols: List[str],
|
1086
1108
|
label_cols: List[str],
|
1087
|
-
sample_weight_col: Optional[str]
|
1109
|
+
sample_weight_col: Optional[str],
|
1110
|
+
statement_params: Dict[str, str]
|
1088
1111
|
) -> float:
|
1089
1112
|
import cloudpickle as cp
|
1090
1113
|
import numpy as np
|
@@ -1134,14 +1157,14 @@ class GaussianNB(BaseTransformer):
|
|
1134
1157
|
api_calls=[Session.call],
|
1135
1158
|
custom_tags=dict([("autogen", True)]),
|
1136
1159
|
)
|
1137
|
-
score =
|
1138
|
-
|
1160
|
+
score = score_wrapper_sproc(
|
1161
|
+
session,
|
1139
1162
|
query,
|
1140
1163
|
stage_score_file_name,
|
1141
1164
|
identifier.get_unescaped_names(self.input_cols),
|
1142
1165
|
identifier.get_unescaped_names(self.label_cols),
|
1143
1166
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1144
|
-
statement_params
|
1167
|
+
statement_params,
|
1145
1168
|
)
|
1146
1169
|
|
1147
1170
|
cleanup_temp_files([local_score_file_name])
|
@@ -1159,18 +1182,20 @@ class GaussianNB(BaseTransformer):
|
|
1159
1182
|
if self._sklearn_object._estimator_type == 'classifier':
|
1160
1183
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1161
1184
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1162
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1185
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1186
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1163
1187
|
# For regressor, the type of predict is float64
|
1164
1188
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1165
1189
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1166
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1167
|
-
|
1190
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1191
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1168
1192
|
for prob_func in PROB_FUNCTIONS:
|
1169
1193
|
if hasattr(self, prob_func):
|
1170
1194
|
output_cols_prefix: str = f"{prob_func}_"
|
1171
1195
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1172
1196
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1173
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1197
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1198
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1174
1199
|
|
1175
1200
|
@property
|
1176
1201
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -200,7 +202,6 @@ class MultinomialNB(BaseTransformer):
|
|
200
202
|
sample_weight_col: Optional[str] = None,
|
201
203
|
) -> None:
|
202
204
|
super().__init__()
|
203
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
204
205
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
205
206
|
|
206
207
|
self._deps = list(deps)
|
@@ -223,6 +224,15 @@ class MultinomialNB(BaseTransformer):
|
|
223
224
|
self.set_drop_input_cols(drop_input_cols)
|
224
225
|
self.set_sample_weight_col(sample_weight_col)
|
225
226
|
|
227
|
+
def _get_rand_id(self) -> str:
|
228
|
+
"""
|
229
|
+
Generate random id to be used in sproc and stage names.
|
230
|
+
|
231
|
+
Returns:
|
232
|
+
Random id string usable in sproc, table, and stage names.
|
233
|
+
"""
|
234
|
+
return str(uuid4()).replace("-", "_").upper()
|
235
|
+
|
226
236
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
227
237
|
"""
|
228
238
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -301,7 +311,7 @@ class MultinomialNB(BaseTransformer):
|
|
301
311
|
cp.dump(self._sklearn_object, local_transform_file)
|
302
312
|
|
303
313
|
# Create temp stage to run fit.
|
304
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
314
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
305
315
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
306
316
|
SqlResultValidator(
|
307
317
|
session=session,
|
@@ -314,11 +324,12 @@ class MultinomialNB(BaseTransformer):
|
|
314
324
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
315
325
|
).validate()
|
316
326
|
|
317
|
-
|
327
|
+
# Use posixpath to construct stage paths
|
328
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
329
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
318
330
|
local_result_file_name = get_temp_file_path()
|
319
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
320
331
|
|
321
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
332
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
322
333
|
statement_params = telemetry.get_function_usage_statement_params(
|
323
334
|
project=_PROJECT,
|
324
335
|
subproject=_SUBPROJECT,
|
@@ -344,6 +355,7 @@ class MultinomialNB(BaseTransformer):
|
|
344
355
|
replace=True,
|
345
356
|
session=session,
|
346
357
|
statement_params=statement_params,
|
358
|
+
anonymous=True
|
347
359
|
)
|
348
360
|
def fit_wrapper_sproc(
|
349
361
|
session: Session,
|
@@ -352,7 +364,8 @@ class MultinomialNB(BaseTransformer):
|
|
352
364
|
stage_result_file_name: str,
|
353
365
|
input_cols: List[str],
|
354
366
|
label_cols: List[str],
|
355
|
-
sample_weight_col: Optional[str]
|
367
|
+
sample_weight_col: Optional[str],
|
368
|
+
statement_params: Dict[str, str]
|
356
369
|
) -> str:
|
357
370
|
import cloudpickle as cp
|
358
371
|
import numpy as np
|
@@ -419,15 +432,15 @@ class MultinomialNB(BaseTransformer):
|
|
419
432
|
api_calls=[Session.call],
|
420
433
|
custom_tags=dict([("autogen", True)]),
|
421
434
|
)
|
422
|
-
sproc_export_file_name =
|
423
|
-
|
435
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
436
|
+
session,
|
424
437
|
query,
|
425
438
|
stage_transform_file_name,
|
426
439
|
stage_result_file_name,
|
427
440
|
identifier.get_unescaped_names(self.input_cols),
|
428
441
|
identifier.get_unescaped_names(self.label_cols),
|
429
442
|
identifier.get_unescaped_names(self.sample_weight_col),
|
430
|
-
statement_params
|
443
|
+
statement_params,
|
431
444
|
)
|
432
445
|
|
433
446
|
if "|" in sproc_export_file_name:
|
@@ -437,7 +450,7 @@ class MultinomialNB(BaseTransformer):
|
|
437
450
|
print("\n".join(fields[1:]))
|
438
451
|
|
439
452
|
session.file.get(
|
440
|
-
|
453
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
441
454
|
local_result_file_name,
|
442
455
|
statement_params=statement_params
|
443
456
|
)
|
@@ -483,7 +496,7 @@ class MultinomialNB(BaseTransformer):
|
|
483
496
|
|
484
497
|
# Register vectorized UDF for batch inference
|
485
498
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
486
|
-
safe_id=self.
|
499
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
487
500
|
|
488
501
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
489
502
|
# will try to pickle all of self which fails.
|
@@ -575,7 +588,7 @@ class MultinomialNB(BaseTransformer):
|
|
575
588
|
return transformed_pandas_df.to_dict("records")
|
576
589
|
|
577
590
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
578
|
-
safe_id=self.
|
591
|
+
safe_id=self._get_rand_id()
|
579
592
|
)
|
580
593
|
|
581
594
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -742,11 +755,18 @@ class MultinomialNB(BaseTransformer):
|
|
742
755
|
Transformed dataset.
|
743
756
|
"""
|
744
757
|
if isinstance(dataset, DataFrame):
|
758
|
+
expected_type_inferred = ""
|
759
|
+
# when it is classifier, infer the datatype from label columns
|
760
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
761
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
762
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
763
|
+
)
|
764
|
+
|
745
765
|
output_df = self._batch_inference(
|
746
766
|
dataset=dataset,
|
747
767
|
inference_method="predict",
|
748
768
|
expected_output_cols_list=self.output_cols,
|
749
|
-
expected_output_cols_type=
|
769
|
+
expected_output_cols_type=expected_type_inferred,
|
750
770
|
)
|
751
771
|
elif isinstance(dataset, pd.DataFrame):
|
752
772
|
output_df = self._sklearn_inference(
|
@@ -817,10 +837,10 @@ class MultinomialNB(BaseTransformer):
|
|
817
837
|
|
818
838
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
819
839
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
820
|
-
Returns
|
840
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
821
841
|
"""
|
822
842
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
823
|
-
return []
|
843
|
+
return [output_cols_prefix]
|
824
844
|
|
825
845
|
classes = self._sklearn_object.classes_
|
826
846
|
if isinstance(classes, numpy.ndarray):
|
@@ -1049,7 +1069,7 @@ class MultinomialNB(BaseTransformer):
|
|
1049
1069
|
cp.dump(self._sklearn_object, local_score_file)
|
1050
1070
|
|
1051
1071
|
# Create temp stage to run score.
|
1052
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1072
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1053
1073
|
session = dataset._session
|
1054
1074
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1055
1075
|
SqlResultValidator(
|
@@ -1063,8 +1083,9 @@ class MultinomialNB(BaseTransformer):
|
|
1063
1083
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1064
1084
|
).validate()
|
1065
1085
|
|
1066
|
-
|
1067
|
-
|
1086
|
+
# Use posixpath to construct stage paths
|
1087
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1088
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1068
1089
|
statement_params = telemetry.get_function_usage_statement_params(
|
1069
1090
|
project=_PROJECT,
|
1070
1091
|
subproject=_SUBPROJECT,
|
@@ -1090,6 +1111,7 @@ class MultinomialNB(BaseTransformer):
|
|
1090
1111
|
replace=True,
|
1091
1112
|
session=session,
|
1092
1113
|
statement_params=statement_params,
|
1114
|
+
anonymous=True
|
1093
1115
|
)
|
1094
1116
|
def score_wrapper_sproc(
|
1095
1117
|
session: Session,
|
@@ -1097,7 +1119,8 @@ class MultinomialNB(BaseTransformer):
|
|
1097
1119
|
stage_score_file_name: str,
|
1098
1120
|
input_cols: List[str],
|
1099
1121
|
label_cols: List[str],
|
1100
|
-
sample_weight_col: Optional[str]
|
1122
|
+
sample_weight_col: Optional[str],
|
1123
|
+
statement_params: Dict[str, str]
|
1101
1124
|
) -> float:
|
1102
1125
|
import cloudpickle as cp
|
1103
1126
|
import numpy as np
|
@@ -1147,14 +1170,14 @@ class MultinomialNB(BaseTransformer):
|
|
1147
1170
|
api_calls=[Session.call],
|
1148
1171
|
custom_tags=dict([("autogen", True)]),
|
1149
1172
|
)
|
1150
|
-
score =
|
1151
|
-
|
1173
|
+
score = score_wrapper_sproc(
|
1174
|
+
session,
|
1152
1175
|
query,
|
1153
1176
|
stage_score_file_name,
|
1154
1177
|
identifier.get_unescaped_names(self.input_cols),
|
1155
1178
|
identifier.get_unescaped_names(self.label_cols),
|
1156
1179
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1157
|
-
statement_params
|
1180
|
+
statement_params,
|
1158
1181
|
)
|
1159
1182
|
|
1160
1183
|
cleanup_temp_files([local_score_file_name])
|
@@ -1172,18 +1195,20 @@ class MultinomialNB(BaseTransformer):
|
|
1172
1195
|
if self._sklearn_object._estimator_type == 'classifier':
|
1173
1196
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1174
1197
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1175
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1198
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1199
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1176
1200
|
# For regressor, the type of predict is float64
|
1177
1201
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1178
1202
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1179
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1180
|
-
|
1203
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1204
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1181
1205
|
for prob_func in PROB_FUNCTIONS:
|
1182
1206
|
if hasattr(self, prob_func):
|
1183
1207
|
output_cols_prefix: str = f"{prob_func}_"
|
1184
1208
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1185
1209
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1186
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1210
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1211
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1187
1212
|
|
1188
1213
|
@property
|
1189
1214
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|