snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -199,7 +201,6 @@ class NearestCentroid(BaseTransformer):
|
|
199
201
|
sample_weight_col: Optional[str] = None,
|
200
202
|
) -> None:
|
201
203
|
super().__init__()
|
202
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
203
204
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
204
205
|
|
205
206
|
self._deps = list(deps)
|
@@ -220,6 +221,15 @@ class NearestCentroid(BaseTransformer):
|
|
220
221
|
self.set_drop_input_cols(drop_input_cols)
|
221
222
|
self.set_sample_weight_col(sample_weight_col)
|
222
223
|
|
224
|
+
def _get_rand_id(self) -> str:
|
225
|
+
"""
|
226
|
+
Generate random id to be used in sproc and stage names.
|
227
|
+
|
228
|
+
Returns:
|
229
|
+
Random id string usable in sproc, table, and stage names.
|
230
|
+
"""
|
231
|
+
return str(uuid4()).replace("-", "_").upper()
|
232
|
+
|
223
233
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
224
234
|
"""
|
225
235
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -298,7 +308,7 @@ class NearestCentroid(BaseTransformer):
|
|
298
308
|
cp.dump(self._sklearn_object, local_transform_file)
|
299
309
|
|
300
310
|
# Create temp stage to run fit.
|
301
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
311
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
302
312
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
303
313
|
SqlResultValidator(
|
304
314
|
session=session,
|
@@ -311,11 +321,12 @@ class NearestCentroid(BaseTransformer):
|
|
311
321
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
312
322
|
).validate()
|
313
323
|
|
314
|
-
|
324
|
+
# Use posixpath to construct stage paths
|
325
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
326
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
315
327
|
local_result_file_name = get_temp_file_path()
|
316
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
317
328
|
|
318
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
329
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
319
330
|
statement_params = telemetry.get_function_usage_statement_params(
|
320
331
|
project=_PROJECT,
|
321
332
|
subproject=_SUBPROJECT,
|
@@ -341,6 +352,7 @@ class NearestCentroid(BaseTransformer):
|
|
341
352
|
replace=True,
|
342
353
|
session=session,
|
343
354
|
statement_params=statement_params,
|
355
|
+
anonymous=True
|
344
356
|
)
|
345
357
|
def fit_wrapper_sproc(
|
346
358
|
session: Session,
|
@@ -349,7 +361,8 @@ class NearestCentroid(BaseTransformer):
|
|
349
361
|
stage_result_file_name: str,
|
350
362
|
input_cols: List[str],
|
351
363
|
label_cols: List[str],
|
352
|
-
sample_weight_col: Optional[str]
|
364
|
+
sample_weight_col: Optional[str],
|
365
|
+
statement_params: Dict[str, str]
|
353
366
|
) -> str:
|
354
367
|
import cloudpickle as cp
|
355
368
|
import numpy as np
|
@@ -416,15 +429,15 @@ class NearestCentroid(BaseTransformer):
|
|
416
429
|
api_calls=[Session.call],
|
417
430
|
custom_tags=dict([("autogen", True)]),
|
418
431
|
)
|
419
|
-
sproc_export_file_name =
|
420
|
-
|
432
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
433
|
+
session,
|
421
434
|
query,
|
422
435
|
stage_transform_file_name,
|
423
436
|
stage_result_file_name,
|
424
437
|
identifier.get_unescaped_names(self.input_cols),
|
425
438
|
identifier.get_unescaped_names(self.label_cols),
|
426
439
|
identifier.get_unescaped_names(self.sample_weight_col),
|
427
|
-
statement_params
|
440
|
+
statement_params,
|
428
441
|
)
|
429
442
|
|
430
443
|
if "|" in sproc_export_file_name:
|
@@ -434,7 +447,7 @@ class NearestCentroid(BaseTransformer):
|
|
434
447
|
print("\n".join(fields[1:]))
|
435
448
|
|
436
449
|
session.file.get(
|
437
|
-
|
450
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
438
451
|
local_result_file_name,
|
439
452
|
statement_params=statement_params
|
440
453
|
)
|
@@ -480,7 +493,7 @@ class NearestCentroid(BaseTransformer):
|
|
480
493
|
|
481
494
|
# Register vectorized UDF for batch inference
|
482
495
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
483
|
-
safe_id=self.
|
496
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
484
497
|
|
485
498
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
486
499
|
# will try to pickle all of self which fails.
|
@@ -572,7 +585,7 @@ class NearestCentroid(BaseTransformer):
|
|
572
585
|
return transformed_pandas_df.to_dict("records")
|
573
586
|
|
574
587
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
575
|
-
safe_id=self.
|
588
|
+
safe_id=self._get_rand_id()
|
576
589
|
)
|
577
590
|
|
578
591
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -739,11 +752,18 @@ class NearestCentroid(BaseTransformer):
|
|
739
752
|
Transformed dataset.
|
740
753
|
"""
|
741
754
|
if isinstance(dataset, DataFrame):
|
755
|
+
expected_type_inferred = ""
|
756
|
+
# when it is classifier, infer the datatype from label columns
|
757
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
758
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
759
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
760
|
+
)
|
761
|
+
|
742
762
|
output_df = self._batch_inference(
|
743
763
|
dataset=dataset,
|
744
764
|
inference_method="predict",
|
745
765
|
expected_output_cols_list=self.output_cols,
|
746
|
-
expected_output_cols_type=
|
766
|
+
expected_output_cols_type=expected_type_inferred,
|
747
767
|
)
|
748
768
|
elif isinstance(dataset, pd.DataFrame):
|
749
769
|
output_df = self._sklearn_inference(
|
@@ -814,10 +834,10 @@ class NearestCentroid(BaseTransformer):
|
|
814
834
|
|
815
835
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
816
836
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
817
|
-
Returns
|
837
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
818
838
|
"""
|
819
839
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
820
|
-
return []
|
840
|
+
return [output_cols_prefix]
|
821
841
|
|
822
842
|
classes = self._sklearn_object.classes_
|
823
843
|
if isinstance(classes, numpy.ndarray):
|
@@ -1042,7 +1062,7 @@ class NearestCentroid(BaseTransformer):
|
|
1042
1062
|
cp.dump(self._sklearn_object, local_score_file)
|
1043
1063
|
|
1044
1064
|
# Create temp stage to run score.
|
1045
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1065
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1046
1066
|
session = dataset._session
|
1047
1067
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1048
1068
|
SqlResultValidator(
|
@@ -1056,8 +1076,9 @@ class NearestCentroid(BaseTransformer):
|
|
1056
1076
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1057
1077
|
).validate()
|
1058
1078
|
|
1059
|
-
|
1060
|
-
|
1079
|
+
# Use posixpath to construct stage paths
|
1080
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1081
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1061
1082
|
statement_params = telemetry.get_function_usage_statement_params(
|
1062
1083
|
project=_PROJECT,
|
1063
1084
|
subproject=_SUBPROJECT,
|
@@ -1083,6 +1104,7 @@ class NearestCentroid(BaseTransformer):
|
|
1083
1104
|
replace=True,
|
1084
1105
|
session=session,
|
1085
1106
|
statement_params=statement_params,
|
1107
|
+
anonymous=True
|
1086
1108
|
)
|
1087
1109
|
def score_wrapper_sproc(
|
1088
1110
|
session: Session,
|
@@ -1090,7 +1112,8 @@ class NearestCentroid(BaseTransformer):
|
|
1090
1112
|
stage_score_file_name: str,
|
1091
1113
|
input_cols: List[str],
|
1092
1114
|
label_cols: List[str],
|
1093
|
-
sample_weight_col: Optional[str]
|
1115
|
+
sample_weight_col: Optional[str],
|
1116
|
+
statement_params: Dict[str, str]
|
1094
1117
|
) -> float:
|
1095
1118
|
import cloudpickle as cp
|
1096
1119
|
import numpy as np
|
@@ -1140,14 +1163,14 @@ class NearestCentroid(BaseTransformer):
|
|
1140
1163
|
api_calls=[Session.call],
|
1141
1164
|
custom_tags=dict([("autogen", True)]),
|
1142
1165
|
)
|
1143
|
-
score =
|
1144
|
-
|
1166
|
+
score = score_wrapper_sproc(
|
1167
|
+
session,
|
1145
1168
|
query,
|
1146
1169
|
stage_score_file_name,
|
1147
1170
|
identifier.get_unescaped_names(self.input_cols),
|
1148
1171
|
identifier.get_unescaped_names(self.label_cols),
|
1149
1172
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1150
|
-
statement_params
|
1173
|
+
statement_params,
|
1151
1174
|
)
|
1152
1175
|
|
1153
1176
|
cleanup_temp_files([local_score_file_name])
|
@@ -1165,18 +1188,20 @@ class NearestCentroid(BaseTransformer):
|
|
1165
1188
|
if self._sklearn_object._estimator_type == 'classifier':
|
1166
1189
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1167
1190
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1168
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1191
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1192
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1169
1193
|
# For regressor, the type of predict is float64
|
1170
1194
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1171
1195
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1172
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1173
|
-
|
1196
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1197
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1174
1198
|
for prob_func in PROB_FUNCTIONS:
|
1175
1199
|
if hasattr(self, prob_func):
|
1176
1200
|
output_cols_prefix: str = f"{prob_func}_"
|
1177
1201
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1178
1202
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1179
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1203
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1204
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1180
1205
|
|
1181
1206
|
@property
|
1182
1207
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -245,7 +247,6 @@ class NearestNeighbors(BaseTransformer):
|
|
245
247
|
sample_weight_col: Optional[str] = None,
|
246
248
|
) -> None:
|
247
249
|
super().__init__()
|
248
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
249
250
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
250
251
|
|
251
252
|
self._deps = list(deps)
|
@@ -272,6 +273,15 @@ class NearestNeighbors(BaseTransformer):
|
|
272
273
|
self.set_drop_input_cols(drop_input_cols)
|
273
274
|
self.set_sample_weight_col(sample_weight_col)
|
274
275
|
|
276
|
+
def _get_rand_id(self) -> str:
|
277
|
+
"""
|
278
|
+
Generate random id to be used in sproc and stage names.
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
Random id string usable in sproc, table, and stage names.
|
282
|
+
"""
|
283
|
+
return str(uuid4()).replace("-", "_").upper()
|
284
|
+
|
275
285
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
276
286
|
"""
|
277
287
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -350,7 +360,7 @@ class NearestNeighbors(BaseTransformer):
|
|
350
360
|
cp.dump(self._sklearn_object, local_transform_file)
|
351
361
|
|
352
362
|
# Create temp stage to run fit.
|
353
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
363
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
354
364
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
355
365
|
SqlResultValidator(
|
356
366
|
session=session,
|
@@ -363,11 +373,12 @@ class NearestNeighbors(BaseTransformer):
|
|
363
373
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
364
374
|
).validate()
|
365
375
|
|
366
|
-
|
376
|
+
# Use posixpath to construct stage paths
|
377
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
378
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
367
379
|
local_result_file_name = get_temp_file_path()
|
368
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
369
380
|
|
370
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
381
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
371
382
|
statement_params = telemetry.get_function_usage_statement_params(
|
372
383
|
project=_PROJECT,
|
373
384
|
subproject=_SUBPROJECT,
|
@@ -393,6 +404,7 @@ class NearestNeighbors(BaseTransformer):
|
|
393
404
|
replace=True,
|
394
405
|
session=session,
|
395
406
|
statement_params=statement_params,
|
407
|
+
anonymous=True
|
396
408
|
)
|
397
409
|
def fit_wrapper_sproc(
|
398
410
|
session: Session,
|
@@ -401,7 +413,8 @@ class NearestNeighbors(BaseTransformer):
|
|
401
413
|
stage_result_file_name: str,
|
402
414
|
input_cols: List[str],
|
403
415
|
label_cols: List[str],
|
404
|
-
sample_weight_col: Optional[str]
|
416
|
+
sample_weight_col: Optional[str],
|
417
|
+
statement_params: Dict[str, str]
|
405
418
|
) -> str:
|
406
419
|
import cloudpickle as cp
|
407
420
|
import numpy as np
|
@@ -468,15 +481,15 @@ class NearestNeighbors(BaseTransformer):
|
|
468
481
|
api_calls=[Session.call],
|
469
482
|
custom_tags=dict([("autogen", True)]),
|
470
483
|
)
|
471
|
-
sproc_export_file_name =
|
472
|
-
|
484
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
485
|
+
session,
|
473
486
|
query,
|
474
487
|
stage_transform_file_name,
|
475
488
|
stage_result_file_name,
|
476
489
|
identifier.get_unescaped_names(self.input_cols),
|
477
490
|
identifier.get_unescaped_names(self.label_cols),
|
478
491
|
identifier.get_unescaped_names(self.sample_weight_col),
|
479
|
-
statement_params
|
492
|
+
statement_params,
|
480
493
|
)
|
481
494
|
|
482
495
|
if "|" in sproc_export_file_name:
|
@@ -486,7 +499,7 @@ class NearestNeighbors(BaseTransformer):
|
|
486
499
|
print("\n".join(fields[1:]))
|
487
500
|
|
488
501
|
session.file.get(
|
489
|
-
|
502
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
490
503
|
local_result_file_name,
|
491
504
|
statement_params=statement_params
|
492
505
|
)
|
@@ -532,7 +545,7 @@ class NearestNeighbors(BaseTransformer):
|
|
532
545
|
|
533
546
|
# Register vectorized UDF for batch inference
|
534
547
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
535
|
-
safe_id=self.
|
548
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
536
549
|
|
537
550
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
538
551
|
# will try to pickle all of self which fails.
|
@@ -624,7 +637,7 @@ class NearestNeighbors(BaseTransformer):
|
|
624
637
|
return transformed_pandas_df.to_dict("records")
|
625
638
|
|
626
639
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
627
|
-
safe_id=self.
|
640
|
+
safe_id=self._get_rand_id()
|
628
641
|
)
|
629
642
|
|
630
643
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -789,11 +802,18 @@ class NearestNeighbors(BaseTransformer):
|
|
789
802
|
Transformed dataset.
|
790
803
|
"""
|
791
804
|
if isinstance(dataset, DataFrame):
|
805
|
+
expected_type_inferred = ""
|
806
|
+
# when it is classifier, infer the datatype from label columns
|
807
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
808
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
809
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
810
|
+
)
|
811
|
+
|
792
812
|
output_df = self._batch_inference(
|
793
813
|
dataset=dataset,
|
794
814
|
inference_method="predict",
|
795
815
|
expected_output_cols_list=self.output_cols,
|
796
|
-
expected_output_cols_type=
|
816
|
+
expected_output_cols_type=expected_type_inferred,
|
797
817
|
)
|
798
818
|
elif isinstance(dataset, pd.DataFrame):
|
799
819
|
output_df = self._sklearn_inference(
|
@@ -864,10 +884,10 @@ class NearestNeighbors(BaseTransformer):
|
|
864
884
|
|
865
885
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
866
886
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
867
|
-
Returns
|
887
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
868
888
|
"""
|
869
889
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
870
|
-
return []
|
890
|
+
return [output_cols_prefix]
|
871
891
|
|
872
892
|
classes = self._sklearn_object.classes_
|
873
893
|
if isinstance(classes, numpy.ndarray):
|
@@ -1092,7 +1112,7 @@ class NearestNeighbors(BaseTransformer):
|
|
1092
1112
|
cp.dump(self._sklearn_object, local_score_file)
|
1093
1113
|
|
1094
1114
|
# Create temp stage to run score.
|
1095
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1115
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1096
1116
|
session = dataset._session
|
1097
1117
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1098
1118
|
SqlResultValidator(
|
@@ -1106,8 +1126,9 @@ class NearestNeighbors(BaseTransformer):
|
|
1106
1126
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1107
1127
|
).validate()
|
1108
1128
|
|
1109
|
-
|
1110
|
-
|
1129
|
+
# Use posixpath to construct stage paths
|
1130
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1131
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1111
1132
|
statement_params = telemetry.get_function_usage_statement_params(
|
1112
1133
|
project=_PROJECT,
|
1113
1134
|
subproject=_SUBPROJECT,
|
@@ -1133,6 +1154,7 @@ class NearestNeighbors(BaseTransformer):
|
|
1133
1154
|
replace=True,
|
1134
1155
|
session=session,
|
1135
1156
|
statement_params=statement_params,
|
1157
|
+
anonymous=True
|
1136
1158
|
)
|
1137
1159
|
def score_wrapper_sproc(
|
1138
1160
|
session: Session,
|
@@ -1140,7 +1162,8 @@ class NearestNeighbors(BaseTransformer):
|
|
1140
1162
|
stage_score_file_name: str,
|
1141
1163
|
input_cols: List[str],
|
1142
1164
|
label_cols: List[str],
|
1143
|
-
sample_weight_col: Optional[str]
|
1165
|
+
sample_weight_col: Optional[str],
|
1166
|
+
statement_params: Dict[str, str]
|
1144
1167
|
) -> float:
|
1145
1168
|
import cloudpickle as cp
|
1146
1169
|
import numpy as np
|
@@ -1190,14 +1213,14 @@ class NearestNeighbors(BaseTransformer):
|
|
1190
1213
|
api_calls=[Session.call],
|
1191
1214
|
custom_tags=dict([("autogen", True)]),
|
1192
1215
|
)
|
1193
|
-
score =
|
1194
|
-
|
1216
|
+
score = score_wrapper_sproc(
|
1217
|
+
session,
|
1195
1218
|
query,
|
1196
1219
|
stage_score_file_name,
|
1197
1220
|
identifier.get_unescaped_names(self.input_cols),
|
1198
1221
|
identifier.get_unescaped_names(self.label_cols),
|
1199
1222
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1200
|
-
statement_params
|
1223
|
+
statement_params,
|
1201
1224
|
)
|
1202
1225
|
|
1203
1226
|
cleanup_temp_files([local_score_file_name])
|
@@ -1215,18 +1238,20 @@ class NearestNeighbors(BaseTransformer):
|
|
1215
1238
|
if self._sklearn_object._estimator_type == 'classifier':
|
1216
1239
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1217
1240
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1218
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1241
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1242
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1219
1243
|
# For regressor, the type of predict is float64
|
1220
1244
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1221
1245
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1222
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1223
|
-
|
1246
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1247
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1224
1248
|
for prob_func in PROB_FUNCTIONS:
|
1225
1249
|
if hasattr(self, prob_func):
|
1226
1250
|
output_cols_prefix: str = f"{prob_func}_"
|
1227
1251
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1228
1252
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1229
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1253
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1254
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1230
1255
|
|
1231
1256
|
@property
|
1232
1257
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|