snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -229,7 +231,6 @@ class Nystroem(BaseTransformer):
|
|
229
231
|
sample_weight_col: Optional[str] = None,
|
230
232
|
) -> None:
|
231
233
|
super().__init__()
|
232
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
233
234
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
234
235
|
|
235
236
|
self._deps = list(deps)
|
@@ -256,6 +257,15 @@ class Nystroem(BaseTransformer):
|
|
256
257
|
self.set_drop_input_cols(drop_input_cols)
|
257
258
|
self.set_sample_weight_col(sample_weight_col)
|
258
259
|
|
260
|
+
def _get_rand_id(self) -> str:
|
261
|
+
"""
|
262
|
+
Generate random id to be used in sproc and stage names.
|
263
|
+
|
264
|
+
Returns:
|
265
|
+
Random id string usable in sproc, table, and stage names.
|
266
|
+
"""
|
267
|
+
return str(uuid4()).replace("-", "_").upper()
|
268
|
+
|
259
269
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
260
270
|
"""
|
261
271
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -334,7 +344,7 @@ class Nystroem(BaseTransformer):
|
|
334
344
|
cp.dump(self._sklearn_object, local_transform_file)
|
335
345
|
|
336
346
|
# Create temp stage to run fit.
|
337
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
347
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
338
348
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
339
349
|
SqlResultValidator(
|
340
350
|
session=session,
|
@@ -347,11 +357,12 @@ class Nystroem(BaseTransformer):
|
|
347
357
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
348
358
|
).validate()
|
349
359
|
|
350
|
-
|
360
|
+
# Use posixpath to construct stage paths
|
361
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
362
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
351
363
|
local_result_file_name = get_temp_file_path()
|
352
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
353
364
|
|
354
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
365
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
355
366
|
statement_params = telemetry.get_function_usage_statement_params(
|
356
367
|
project=_PROJECT,
|
357
368
|
subproject=_SUBPROJECT,
|
@@ -377,6 +388,7 @@ class Nystroem(BaseTransformer):
|
|
377
388
|
replace=True,
|
378
389
|
session=session,
|
379
390
|
statement_params=statement_params,
|
391
|
+
anonymous=True
|
380
392
|
)
|
381
393
|
def fit_wrapper_sproc(
|
382
394
|
session: Session,
|
@@ -385,7 +397,8 @@ class Nystroem(BaseTransformer):
|
|
385
397
|
stage_result_file_name: str,
|
386
398
|
input_cols: List[str],
|
387
399
|
label_cols: List[str],
|
388
|
-
sample_weight_col: Optional[str]
|
400
|
+
sample_weight_col: Optional[str],
|
401
|
+
statement_params: Dict[str, str]
|
389
402
|
) -> str:
|
390
403
|
import cloudpickle as cp
|
391
404
|
import numpy as np
|
@@ -452,15 +465,15 @@ class Nystroem(BaseTransformer):
|
|
452
465
|
api_calls=[Session.call],
|
453
466
|
custom_tags=dict([("autogen", True)]),
|
454
467
|
)
|
455
|
-
sproc_export_file_name =
|
456
|
-
|
468
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
469
|
+
session,
|
457
470
|
query,
|
458
471
|
stage_transform_file_name,
|
459
472
|
stage_result_file_name,
|
460
473
|
identifier.get_unescaped_names(self.input_cols),
|
461
474
|
identifier.get_unescaped_names(self.label_cols),
|
462
475
|
identifier.get_unescaped_names(self.sample_weight_col),
|
463
|
-
statement_params
|
476
|
+
statement_params,
|
464
477
|
)
|
465
478
|
|
466
479
|
if "|" in sproc_export_file_name:
|
@@ -470,7 +483,7 @@ class Nystroem(BaseTransformer):
|
|
470
483
|
print("\n".join(fields[1:]))
|
471
484
|
|
472
485
|
session.file.get(
|
473
|
-
|
486
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
474
487
|
local_result_file_name,
|
475
488
|
statement_params=statement_params
|
476
489
|
)
|
@@ -516,7 +529,7 @@ class Nystroem(BaseTransformer):
|
|
516
529
|
|
517
530
|
# Register vectorized UDF for batch inference
|
518
531
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
519
|
-
safe_id=self.
|
532
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
520
533
|
|
521
534
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
522
535
|
# will try to pickle all of self which fails.
|
@@ -608,7 +621,7 @@ class Nystroem(BaseTransformer):
|
|
608
621
|
return transformed_pandas_df.to_dict("records")
|
609
622
|
|
610
623
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
611
|
-
safe_id=self.
|
624
|
+
safe_id=self._get_rand_id()
|
612
625
|
)
|
613
626
|
|
614
627
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -773,11 +786,18 @@ class Nystroem(BaseTransformer):
|
|
773
786
|
Transformed dataset.
|
774
787
|
"""
|
775
788
|
if isinstance(dataset, DataFrame):
|
789
|
+
expected_type_inferred = ""
|
790
|
+
# when it is classifier, infer the datatype from label columns
|
791
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
792
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
793
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
794
|
+
)
|
795
|
+
|
776
796
|
output_df = self._batch_inference(
|
777
797
|
dataset=dataset,
|
778
798
|
inference_method="predict",
|
779
799
|
expected_output_cols_list=self.output_cols,
|
780
|
-
expected_output_cols_type=
|
800
|
+
expected_output_cols_type=expected_type_inferred,
|
781
801
|
)
|
782
802
|
elif isinstance(dataset, pd.DataFrame):
|
783
803
|
output_df = self._sklearn_inference(
|
@@ -850,10 +870,10 @@ class Nystroem(BaseTransformer):
|
|
850
870
|
|
851
871
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
852
872
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
853
|
-
Returns
|
873
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
854
874
|
"""
|
855
875
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
856
|
-
return []
|
876
|
+
return [output_cols_prefix]
|
857
877
|
|
858
878
|
classes = self._sklearn_object.classes_
|
859
879
|
if isinstance(classes, numpy.ndarray):
|
@@ -1078,7 +1098,7 @@ class Nystroem(BaseTransformer):
|
|
1078
1098
|
cp.dump(self._sklearn_object, local_score_file)
|
1079
1099
|
|
1080
1100
|
# Create temp stage to run score.
|
1081
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1101
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1082
1102
|
session = dataset._session
|
1083
1103
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1084
1104
|
SqlResultValidator(
|
@@ -1092,8 +1112,9 @@ class Nystroem(BaseTransformer):
|
|
1092
1112
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1093
1113
|
).validate()
|
1094
1114
|
|
1095
|
-
|
1096
|
-
|
1115
|
+
# Use posixpath to construct stage paths
|
1116
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1117
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1097
1118
|
statement_params = telemetry.get_function_usage_statement_params(
|
1098
1119
|
project=_PROJECT,
|
1099
1120
|
subproject=_SUBPROJECT,
|
@@ -1119,6 +1140,7 @@ class Nystroem(BaseTransformer):
|
|
1119
1140
|
replace=True,
|
1120
1141
|
session=session,
|
1121
1142
|
statement_params=statement_params,
|
1143
|
+
anonymous=True
|
1122
1144
|
)
|
1123
1145
|
def score_wrapper_sproc(
|
1124
1146
|
session: Session,
|
@@ -1126,7 +1148,8 @@ class Nystroem(BaseTransformer):
|
|
1126
1148
|
stage_score_file_name: str,
|
1127
1149
|
input_cols: List[str],
|
1128
1150
|
label_cols: List[str],
|
1129
|
-
sample_weight_col: Optional[str]
|
1151
|
+
sample_weight_col: Optional[str],
|
1152
|
+
statement_params: Dict[str, str]
|
1130
1153
|
) -> float:
|
1131
1154
|
import cloudpickle as cp
|
1132
1155
|
import numpy as np
|
@@ -1176,14 +1199,14 @@ class Nystroem(BaseTransformer):
|
|
1176
1199
|
api_calls=[Session.call],
|
1177
1200
|
custom_tags=dict([("autogen", True)]),
|
1178
1201
|
)
|
1179
|
-
score =
|
1180
|
-
|
1202
|
+
score = score_wrapper_sproc(
|
1203
|
+
session,
|
1181
1204
|
query,
|
1182
1205
|
stage_score_file_name,
|
1183
1206
|
identifier.get_unescaped_names(self.input_cols),
|
1184
1207
|
identifier.get_unescaped_names(self.label_cols),
|
1185
1208
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1186
|
-
statement_params
|
1209
|
+
statement_params,
|
1187
1210
|
)
|
1188
1211
|
|
1189
1212
|
cleanup_temp_files([local_score_file_name])
|
@@ -1201,18 +1224,20 @@ class Nystroem(BaseTransformer):
|
|
1201
1224
|
if self._sklearn_object._estimator_type == 'classifier':
|
1202
1225
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1203
1226
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1204
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1227
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1228
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1205
1229
|
# For regressor, the type of predict is float64
|
1206
1230
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1207
1231
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1208
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1209
|
-
|
1232
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1233
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1210
1234
|
for prob_func in PROB_FUNCTIONS:
|
1211
1235
|
if hasattr(self, prob_func):
|
1212
1236
|
output_cols_prefix: str = f"{prob_func}_"
|
1213
1237
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1214
1238
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1215
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1239
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1240
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1216
1241
|
|
1217
1242
|
@property
|
1218
1243
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -208,7 +210,6 @@ class PolynomialCountSketch(BaseTransformer):
|
|
208
210
|
sample_weight_col: Optional[str] = None,
|
209
211
|
) -> None:
|
210
212
|
super().__init__()
|
211
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
212
213
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
213
214
|
|
214
215
|
self._deps = list(deps)
|
@@ -232,6 +233,15 @@ class PolynomialCountSketch(BaseTransformer):
|
|
232
233
|
self.set_drop_input_cols(drop_input_cols)
|
233
234
|
self.set_sample_weight_col(sample_weight_col)
|
234
235
|
|
236
|
+
def _get_rand_id(self) -> str:
|
237
|
+
"""
|
238
|
+
Generate random id to be used in sproc and stage names.
|
239
|
+
|
240
|
+
Returns:
|
241
|
+
Random id string usable in sproc, table, and stage names.
|
242
|
+
"""
|
243
|
+
return str(uuid4()).replace("-", "_").upper()
|
244
|
+
|
235
245
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
236
246
|
"""
|
237
247
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -310,7 +320,7 @@ class PolynomialCountSketch(BaseTransformer):
|
|
310
320
|
cp.dump(self._sklearn_object, local_transform_file)
|
311
321
|
|
312
322
|
# Create temp stage to run fit.
|
313
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
323
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
314
324
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
315
325
|
SqlResultValidator(
|
316
326
|
session=session,
|
@@ -323,11 +333,12 @@ class PolynomialCountSketch(BaseTransformer):
|
|
323
333
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
324
334
|
).validate()
|
325
335
|
|
326
|
-
|
336
|
+
# Use posixpath to construct stage paths
|
337
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
338
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
327
339
|
local_result_file_name = get_temp_file_path()
|
328
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
329
340
|
|
330
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
341
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
331
342
|
statement_params = telemetry.get_function_usage_statement_params(
|
332
343
|
project=_PROJECT,
|
333
344
|
subproject=_SUBPROJECT,
|
@@ -353,6 +364,7 @@ class PolynomialCountSketch(BaseTransformer):
|
|
353
364
|
replace=True,
|
354
365
|
session=session,
|
355
366
|
statement_params=statement_params,
|
367
|
+
anonymous=True
|
356
368
|
)
|
357
369
|
def fit_wrapper_sproc(
|
358
370
|
session: Session,
|
@@ -361,7 +373,8 @@ class PolynomialCountSketch(BaseTransformer):
|
|
361
373
|
stage_result_file_name: str,
|
362
374
|
input_cols: List[str],
|
363
375
|
label_cols: List[str],
|
364
|
-
sample_weight_col: Optional[str]
|
376
|
+
sample_weight_col: Optional[str],
|
377
|
+
statement_params: Dict[str, str]
|
365
378
|
) -> str:
|
366
379
|
import cloudpickle as cp
|
367
380
|
import numpy as np
|
@@ -428,15 +441,15 @@ class PolynomialCountSketch(BaseTransformer):
|
|
428
441
|
api_calls=[Session.call],
|
429
442
|
custom_tags=dict([("autogen", True)]),
|
430
443
|
)
|
431
|
-
sproc_export_file_name =
|
432
|
-
|
444
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
445
|
+
session,
|
433
446
|
query,
|
434
447
|
stage_transform_file_name,
|
435
448
|
stage_result_file_name,
|
436
449
|
identifier.get_unescaped_names(self.input_cols),
|
437
450
|
identifier.get_unescaped_names(self.label_cols),
|
438
451
|
identifier.get_unescaped_names(self.sample_weight_col),
|
439
|
-
statement_params
|
452
|
+
statement_params,
|
440
453
|
)
|
441
454
|
|
442
455
|
if "|" in sproc_export_file_name:
|
@@ -446,7 +459,7 @@ class PolynomialCountSketch(BaseTransformer):
|
|
446
459
|
print("\n".join(fields[1:]))
|
447
460
|
|
448
461
|
session.file.get(
|
449
|
-
|
462
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
450
463
|
local_result_file_name,
|
451
464
|
statement_params=statement_params
|
452
465
|
)
|
@@ -492,7 +505,7 @@ class PolynomialCountSketch(BaseTransformer):
|
|
492
505
|
|
493
506
|
# Register vectorized UDF for batch inference
|
494
507
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
495
|
-
safe_id=self.
|
508
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
496
509
|
|
497
510
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
498
511
|
# will try to pickle all of self which fails.
|
@@ -584,7 +597,7 @@ class PolynomialCountSketch(BaseTransformer):
|
|
584
597
|
return transformed_pandas_df.to_dict("records")
|
585
598
|
|
586
599
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
587
|
-
safe_id=self.
|
600
|
+
safe_id=self._get_rand_id()
|
588
601
|
)
|
589
602
|
|
590
603
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -749,11 +762,18 @@ class PolynomialCountSketch(BaseTransformer):
|
|
749
762
|
Transformed dataset.
|
750
763
|
"""
|
751
764
|
if isinstance(dataset, DataFrame):
|
765
|
+
expected_type_inferred = ""
|
766
|
+
# when it is classifier, infer the datatype from label columns
|
767
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
768
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
769
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
770
|
+
)
|
771
|
+
|
752
772
|
output_df = self._batch_inference(
|
753
773
|
dataset=dataset,
|
754
774
|
inference_method="predict",
|
755
775
|
expected_output_cols_list=self.output_cols,
|
756
|
-
expected_output_cols_type=
|
776
|
+
expected_output_cols_type=expected_type_inferred,
|
757
777
|
)
|
758
778
|
elif isinstance(dataset, pd.DataFrame):
|
759
779
|
output_df = self._sklearn_inference(
|
@@ -826,10 +846,10 @@ class PolynomialCountSketch(BaseTransformer):
|
|
826
846
|
|
827
847
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
828
848
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
829
|
-
Returns
|
849
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
830
850
|
"""
|
831
851
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
832
|
-
return []
|
852
|
+
return [output_cols_prefix]
|
833
853
|
|
834
854
|
classes = self._sklearn_object.classes_
|
835
855
|
if isinstance(classes, numpy.ndarray):
|
@@ -1054,7 +1074,7 @@ class PolynomialCountSketch(BaseTransformer):
|
|
1054
1074
|
cp.dump(self._sklearn_object, local_score_file)
|
1055
1075
|
|
1056
1076
|
# Create temp stage to run score.
|
1057
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1077
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1058
1078
|
session = dataset._session
|
1059
1079
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1060
1080
|
SqlResultValidator(
|
@@ -1068,8 +1088,9 @@ class PolynomialCountSketch(BaseTransformer):
|
|
1068
1088
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1069
1089
|
).validate()
|
1070
1090
|
|
1071
|
-
|
1072
|
-
|
1091
|
+
# Use posixpath to construct stage paths
|
1092
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1093
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1073
1094
|
statement_params = telemetry.get_function_usage_statement_params(
|
1074
1095
|
project=_PROJECT,
|
1075
1096
|
subproject=_SUBPROJECT,
|
@@ -1095,6 +1116,7 @@ class PolynomialCountSketch(BaseTransformer):
|
|
1095
1116
|
replace=True,
|
1096
1117
|
session=session,
|
1097
1118
|
statement_params=statement_params,
|
1119
|
+
anonymous=True
|
1098
1120
|
)
|
1099
1121
|
def score_wrapper_sproc(
|
1100
1122
|
session: Session,
|
@@ -1102,7 +1124,8 @@ class PolynomialCountSketch(BaseTransformer):
|
|
1102
1124
|
stage_score_file_name: str,
|
1103
1125
|
input_cols: List[str],
|
1104
1126
|
label_cols: List[str],
|
1105
|
-
sample_weight_col: Optional[str]
|
1127
|
+
sample_weight_col: Optional[str],
|
1128
|
+
statement_params: Dict[str, str]
|
1106
1129
|
) -> float:
|
1107
1130
|
import cloudpickle as cp
|
1108
1131
|
import numpy as np
|
@@ -1152,14 +1175,14 @@ class PolynomialCountSketch(BaseTransformer):
|
|
1152
1175
|
api_calls=[Session.call],
|
1153
1176
|
custom_tags=dict([("autogen", True)]),
|
1154
1177
|
)
|
1155
|
-
score =
|
1156
|
-
|
1178
|
+
score = score_wrapper_sproc(
|
1179
|
+
session,
|
1157
1180
|
query,
|
1158
1181
|
stage_score_file_name,
|
1159
1182
|
identifier.get_unescaped_names(self.input_cols),
|
1160
1183
|
identifier.get_unescaped_names(self.label_cols),
|
1161
1184
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1162
|
-
statement_params
|
1185
|
+
statement_params,
|
1163
1186
|
)
|
1164
1187
|
|
1165
1188
|
cleanup_temp_files([local_score_file_name])
|
@@ -1177,18 +1200,20 @@ class PolynomialCountSketch(BaseTransformer):
|
|
1177
1200
|
if self._sklearn_object._estimator_type == 'classifier':
|
1178
1201
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1179
1202
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1180
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1203
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1204
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1181
1205
|
# For regressor, the type of predict is float64
|
1182
1206
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1183
1207
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1184
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1185
|
-
|
1208
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1209
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1186
1210
|
for prob_func in PROB_FUNCTIONS:
|
1187
1211
|
if hasattr(self, prob_func):
|
1188
1212
|
output_cols_prefix: str = f"{prob_func}_"
|
1189
1213
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1190
1214
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1191
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1215
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1216
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1192
1217
|
|
1193
1218
|
@property
|
1194
1219
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|