snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -236,7 +238,6 @@ class MeanShift(BaseTransformer):
|
|
236
238
|
sample_weight_col: Optional[str] = None,
|
237
239
|
) -> None:
|
238
240
|
super().__init__()
|
239
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
240
241
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
241
242
|
|
242
243
|
self._deps = list(deps)
|
@@ -262,6 +263,15 @@ class MeanShift(BaseTransformer):
|
|
262
263
|
self.set_drop_input_cols(drop_input_cols)
|
263
264
|
self.set_sample_weight_col(sample_weight_col)
|
264
265
|
|
266
|
+
def _get_rand_id(self) -> str:
|
267
|
+
"""
|
268
|
+
Generate random id to be used in sproc and stage names.
|
269
|
+
|
270
|
+
Returns:
|
271
|
+
Random id string usable in sproc, table, and stage names.
|
272
|
+
"""
|
273
|
+
return str(uuid4()).replace("-", "_").upper()
|
274
|
+
|
265
275
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
266
276
|
"""
|
267
277
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -340,7 +350,7 @@ class MeanShift(BaseTransformer):
|
|
340
350
|
cp.dump(self._sklearn_object, local_transform_file)
|
341
351
|
|
342
352
|
# Create temp stage to run fit.
|
343
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
353
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
344
354
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
345
355
|
SqlResultValidator(
|
346
356
|
session=session,
|
@@ -353,11 +363,12 @@ class MeanShift(BaseTransformer):
|
|
353
363
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
354
364
|
).validate()
|
355
365
|
|
356
|
-
|
366
|
+
# Use posixpath to construct stage paths
|
367
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
368
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
357
369
|
local_result_file_name = get_temp_file_path()
|
358
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
359
370
|
|
360
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
371
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
361
372
|
statement_params = telemetry.get_function_usage_statement_params(
|
362
373
|
project=_PROJECT,
|
363
374
|
subproject=_SUBPROJECT,
|
@@ -383,6 +394,7 @@ class MeanShift(BaseTransformer):
|
|
383
394
|
replace=True,
|
384
395
|
session=session,
|
385
396
|
statement_params=statement_params,
|
397
|
+
anonymous=True
|
386
398
|
)
|
387
399
|
def fit_wrapper_sproc(
|
388
400
|
session: Session,
|
@@ -391,7 +403,8 @@ class MeanShift(BaseTransformer):
|
|
391
403
|
stage_result_file_name: str,
|
392
404
|
input_cols: List[str],
|
393
405
|
label_cols: List[str],
|
394
|
-
sample_weight_col: Optional[str]
|
406
|
+
sample_weight_col: Optional[str],
|
407
|
+
statement_params: Dict[str, str]
|
395
408
|
) -> str:
|
396
409
|
import cloudpickle as cp
|
397
410
|
import numpy as np
|
@@ -458,15 +471,15 @@ class MeanShift(BaseTransformer):
|
|
458
471
|
api_calls=[Session.call],
|
459
472
|
custom_tags=dict([("autogen", True)]),
|
460
473
|
)
|
461
|
-
sproc_export_file_name =
|
462
|
-
|
474
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
475
|
+
session,
|
463
476
|
query,
|
464
477
|
stage_transform_file_name,
|
465
478
|
stage_result_file_name,
|
466
479
|
identifier.get_unescaped_names(self.input_cols),
|
467
480
|
identifier.get_unescaped_names(self.label_cols),
|
468
481
|
identifier.get_unescaped_names(self.sample_weight_col),
|
469
|
-
statement_params
|
482
|
+
statement_params,
|
470
483
|
)
|
471
484
|
|
472
485
|
if "|" in sproc_export_file_name:
|
@@ -476,7 +489,7 @@ class MeanShift(BaseTransformer):
|
|
476
489
|
print("\n".join(fields[1:]))
|
477
490
|
|
478
491
|
session.file.get(
|
479
|
-
|
492
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
480
493
|
local_result_file_name,
|
481
494
|
statement_params=statement_params
|
482
495
|
)
|
@@ -522,7 +535,7 @@ class MeanShift(BaseTransformer):
|
|
522
535
|
|
523
536
|
# Register vectorized UDF for batch inference
|
524
537
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
525
|
-
safe_id=self.
|
538
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
526
539
|
|
527
540
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
528
541
|
# will try to pickle all of self which fails.
|
@@ -614,7 +627,7 @@ class MeanShift(BaseTransformer):
|
|
614
627
|
return transformed_pandas_df.to_dict("records")
|
615
628
|
|
616
629
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
617
|
-
safe_id=self.
|
630
|
+
safe_id=self._get_rand_id()
|
618
631
|
)
|
619
632
|
|
620
633
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -781,11 +794,18 @@ class MeanShift(BaseTransformer):
|
|
781
794
|
Transformed dataset.
|
782
795
|
"""
|
783
796
|
if isinstance(dataset, DataFrame):
|
797
|
+
expected_type_inferred = ""
|
798
|
+
# when it is classifier, infer the datatype from label columns
|
799
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
800
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
801
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
802
|
+
)
|
803
|
+
|
784
804
|
output_df = self._batch_inference(
|
785
805
|
dataset=dataset,
|
786
806
|
inference_method="predict",
|
787
807
|
expected_output_cols_list=self.output_cols,
|
788
|
-
expected_output_cols_type=
|
808
|
+
expected_output_cols_type=expected_type_inferred,
|
789
809
|
)
|
790
810
|
elif isinstance(dataset, pd.DataFrame):
|
791
811
|
output_df = self._sklearn_inference(
|
@@ -856,10 +876,10 @@ class MeanShift(BaseTransformer):
|
|
856
876
|
|
857
877
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
858
878
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
859
|
-
Returns
|
879
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
860
880
|
"""
|
861
881
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
862
|
-
return []
|
882
|
+
return [output_cols_prefix]
|
863
883
|
|
864
884
|
classes = self._sklearn_object.classes_
|
865
885
|
if isinstance(classes, numpy.ndarray):
|
@@ -1084,7 +1104,7 @@ class MeanShift(BaseTransformer):
|
|
1084
1104
|
cp.dump(self._sklearn_object, local_score_file)
|
1085
1105
|
|
1086
1106
|
# Create temp stage to run score.
|
1087
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1107
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1088
1108
|
session = dataset._session
|
1089
1109
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1090
1110
|
SqlResultValidator(
|
@@ -1098,8 +1118,9 @@ class MeanShift(BaseTransformer):
|
|
1098
1118
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1099
1119
|
).validate()
|
1100
1120
|
|
1101
|
-
|
1102
|
-
|
1121
|
+
# Use posixpath to construct stage paths
|
1122
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1123
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1103
1124
|
statement_params = telemetry.get_function_usage_statement_params(
|
1104
1125
|
project=_PROJECT,
|
1105
1126
|
subproject=_SUBPROJECT,
|
@@ -1125,6 +1146,7 @@ class MeanShift(BaseTransformer):
|
|
1125
1146
|
replace=True,
|
1126
1147
|
session=session,
|
1127
1148
|
statement_params=statement_params,
|
1149
|
+
anonymous=True
|
1128
1150
|
)
|
1129
1151
|
def score_wrapper_sproc(
|
1130
1152
|
session: Session,
|
@@ -1132,7 +1154,8 @@ class MeanShift(BaseTransformer):
|
|
1132
1154
|
stage_score_file_name: str,
|
1133
1155
|
input_cols: List[str],
|
1134
1156
|
label_cols: List[str],
|
1135
|
-
sample_weight_col: Optional[str]
|
1157
|
+
sample_weight_col: Optional[str],
|
1158
|
+
statement_params: Dict[str, str]
|
1136
1159
|
) -> float:
|
1137
1160
|
import cloudpickle as cp
|
1138
1161
|
import numpy as np
|
@@ -1182,14 +1205,14 @@ class MeanShift(BaseTransformer):
|
|
1182
1205
|
api_calls=[Session.call],
|
1183
1206
|
custom_tags=dict([("autogen", True)]),
|
1184
1207
|
)
|
1185
|
-
score =
|
1186
|
-
|
1208
|
+
score = score_wrapper_sproc(
|
1209
|
+
session,
|
1187
1210
|
query,
|
1188
1211
|
stage_score_file_name,
|
1189
1212
|
identifier.get_unescaped_names(self.input_cols),
|
1190
1213
|
identifier.get_unescaped_names(self.label_cols),
|
1191
1214
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1192
|
-
statement_params
|
1215
|
+
statement_params,
|
1193
1216
|
)
|
1194
1217
|
|
1195
1218
|
cleanup_temp_files([local_score_file_name])
|
@@ -1207,18 +1230,20 @@ class MeanShift(BaseTransformer):
|
|
1207
1230
|
if self._sklearn_object._estimator_type == 'classifier':
|
1208
1231
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1209
1232
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1210
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1233
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1234
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1211
1235
|
# For regressor, the type of predict is float64
|
1212
1236
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1213
1237
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1214
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1215
|
-
|
1238
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1239
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1216
1240
|
for prob_func in PROB_FUNCTIONS:
|
1217
1241
|
if hasattr(self, prob_func):
|
1218
1242
|
output_cols_prefix: str = f"{prob_func}_"
|
1219
1243
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1220
1244
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1221
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1245
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1246
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1222
1247
|
|
1223
1248
|
@property
|
1224
1249
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -281,7 +283,6 @@ class MiniBatchKMeans(BaseTransformer):
|
|
281
283
|
sample_weight_col: Optional[str] = None,
|
282
284
|
) -> None:
|
283
285
|
super().__init__()
|
284
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
285
286
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
286
287
|
|
287
288
|
self._deps = list(deps)
|
@@ -312,6 +313,15 @@ class MiniBatchKMeans(BaseTransformer):
|
|
312
313
|
self.set_drop_input_cols(drop_input_cols)
|
313
314
|
self.set_sample_weight_col(sample_weight_col)
|
314
315
|
|
316
|
+
def _get_rand_id(self) -> str:
|
317
|
+
"""
|
318
|
+
Generate random id to be used in sproc and stage names.
|
319
|
+
|
320
|
+
Returns:
|
321
|
+
Random id string usable in sproc, table, and stage names.
|
322
|
+
"""
|
323
|
+
return str(uuid4()).replace("-", "_").upper()
|
324
|
+
|
315
325
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
316
326
|
"""
|
317
327
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -390,7 +400,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
390
400
|
cp.dump(self._sklearn_object, local_transform_file)
|
391
401
|
|
392
402
|
# Create temp stage to run fit.
|
393
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
403
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
394
404
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
395
405
|
SqlResultValidator(
|
396
406
|
session=session,
|
@@ -403,11 +413,12 @@ class MiniBatchKMeans(BaseTransformer):
|
|
403
413
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
404
414
|
).validate()
|
405
415
|
|
406
|
-
|
416
|
+
# Use posixpath to construct stage paths
|
417
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
418
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
407
419
|
local_result_file_name = get_temp_file_path()
|
408
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
409
420
|
|
410
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
421
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
411
422
|
statement_params = telemetry.get_function_usage_statement_params(
|
412
423
|
project=_PROJECT,
|
413
424
|
subproject=_SUBPROJECT,
|
@@ -433,6 +444,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
433
444
|
replace=True,
|
434
445
|
session=session,
|
435
446
|
statement_params=statement_params,
|
447
|
+
anonymous=True
|
436
448
|
)
|
437
449
|
def fit_wrapper_sproc(
|
438
450
|
session: Session,
|
@@ -441,7 +453,8 @@ class MiniBatchKMeans(BaseTransformer):
|
|
441
453
|
stage_result_file_name: str,
|
442
454
|
input_cols: List[str],
|
443
455
|
label_cols: List[str],
|
444
|
-
sample_weight_col: Optional[str]
|
456
|
+
sample_weight_col: Optional[str],
|
457
|
+
statement_params: Dict[str, str]
|
445
458
|
) -> str:
|
446
459
|
import cloudpickle as cp
|
447
460
|
import numpy as np
|
@@ -508,15 +521,15 @@ class MiniBatchKMeans(BaseTransformer):
|
|
508
521
|
api_calls=[Session.call],
|
509
522
|
custom_tags=dict([("autogen", True)]),
|
510
523
|
)
|
511
|
-
sproc_export_file_name =
|
512
|
-
|
524
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
525
|
+
session,
|
513
526
|
query,
|
514
527
|
stage_transform_file_name,
|
515
528
|
stage_result_file_name,
|
516
529
|
identifier.get_unescaped_names(self.input_cols),
|
517
530
|
identifier.get_unescaped_names(self.label_cols),
|
518
531
|
identifier.get_unescaped_names(self.sample_weight_col),
|
519
|
-
statement_params
|
532
|
+
statement_params,
|
520
533
|
)
|
521
534
|
|
522
535
|
if "|" in sproc_export_file_name:
|
@@ -526,7 +539,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
526
539
|
print("\n".join(fields[1:]))
|
527
540
|
|
528
541
|
session.file.get(
|
529
|
-
|
542
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
530
543
|
local_result_file_name,
|
531
544
|
statement_params=statement_params
|
532
545
|
)
|
@@ -572,7 +585,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
572
585
|
|
573
586
|
# Register vectorized UDF for batch inference
|
574
587
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
575
|
-
safe_id=self.
|
588
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
576
589
|
|
577
590
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
578
591
|
# will try to pickle all of self which fails.
|
@@ -664,7 +677,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
664
677
|
return transformed_pandas_df.to_dict("records")
|
665
678
|
|
666
679
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
667
|
-
safe_id=self.
|
680
|
+
safe_id=self._get_rand_id()
|
668
681
|
)
|
669
682
|
|
670
683
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -831,11 +844,18 @@ class MiniBatchKMeans(BaseTransformer):
|
|
831
844
|
Transformed dataset.
|
832
845
|
"""
|
833
846
|
if isinstance(dataset, DataFrame):
|
847
|
+
expected_type_inferred = ""
|
848
|
+
# when it is classifier, infer the datatype from label columns
|
849
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
850
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
851
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
852
|
+
)
|
853
|
+
|
834
854
|
output_df = self._batch_inference(
|
835
855
|
dataset=dataset,
|
836
856
|
inference_method="predict",
|
837
857
|
expected_output_cols_list=self.output_cols,
|
838
|
-
expected_output_cols_type=
|
858
|
+
expected_output_cols_type=expected_type_inferred,
|
839
859
|
)
|
840
860
|
elif isinstance(dataset, pd.DataFrame):
|
841
861
|
output_df = self._sklearn_inference(
|
@@ -908,10 +928,10 @@ class MiniBatchKMeans(BaseTransformer):
|
|
908
928
|
|
909
929
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
910
930
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
911
|
-
Returns
|
931
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
912
932
|
"""
|
913
933
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
914
|
-
return []
|
934
|
+
return [output_cols_prefix]
|
915
935
|
|
916
936
|
classes = self._sklearn_object.classes_
|
917
937
|
if isinstance(classes, numpy.ndarray):
|
@@ -1136,7 +1156,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
1136
1156
|
cp.dump(self._sklearn_object, local_score_file)
|
1137
1157
|
|
1138
1158
|
# Create temp stage to run score.
|
1139
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1159
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1140
1160
|
session = dataset._session
|
1141
1161
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1142
1162
|
SqlResultValidator(
|
@@ -1150,8 +1170,9 @@ class MiniBatchKMeans(BaseTransformer):
|
|
1150
1170
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1151
1171
|
).validate()
|
1152
1172
|
|
1153
|
-
|
1154
|
-
|
1173
|
+
# Use posixpath to construct stage paths
|
1174
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1175
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1155
1176
|
statement_params = telemetry.get_function_usage_statement_params(
|
1156
1177
|
project=_PROJECT,
|
1157
1178
|
subproject=_SUBPROJECT,
|
@@ -1177,6 +1198,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
1177
1198
|
replace=True,
|
1178
1199
|
session=session,
|
1179
1200
|
statement_params=statement_params,
|
1201
|
+
anonymous=True
|
1180
1202
|
)
|
1181
1203
|
def score_wrapper_sproc(
|
1182
1204
|
session: Session,
|
@@ -1184,7 +1206,8 @@ class MiniBatchKMeans(BaseTransformer):
|
|
1184
1206
|
stage_score_file_name: str,
|
1185
1207
|
input_cols: List[str],
|
1186
1208
|
label_cols: List[str],
|
1187
|
-
sample_weight_col: Optional[str]
|
1209
|
+
sample_weight_col: Optional[str],
|
1210
|
+
statement_params: Dict[str, str]
|
1188
1211
|
) -> float:
|
1189
1212
|
import cloudpickle as cp
|
1190
1213
|
import numpy as np
|
@@ -1234,14 +1257,14 @@ class MiniBatchKMeans(BaseTransformer):
|
|
1234
1257
|
api_calls=[Session.call],
|
1235
1258
|
custom_tags=dict([("autogen", True)]),
|
1236
1259
|
)
|
1237
|
-
score =
|
1238
|
-
|
1260
|
+
score = score_wrapper_sproc(
|
1261
|
+
session,
|
1239
1262
|
query,
|
1240
1263
|
stage_score_file_name,
|
1241
1264
|
identifier.get_unescaped_names(self.input_cols),
|
1242
1265
|
identifier.get_unescaped_names(self.label_cols),
|
1243
1266
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1244
|
-
statement_params
|
1267
|
+
statement_params,
|
1245
1268
|
)
|
1246
1269
|
|
1247
1270
|
cleanup_temp_files([local_score_file_name])
|
@@ -1259,18 +1282,20 @@ class MiniBatchKMeans(BaseTransformer):
|
|
1259
1282
|
if self._sklearn_object._estimator_type == 'classifier':
|
1260
1283
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1261
1284
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1262
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1285
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1286
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1263
1287
|
# For regressor, the type of predict is float64
|
1264
1288
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1265
1289
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1266
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1267
|
-
|
1290
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1291
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1268
1292
|
for prob_func in PROB_FUNCTIONS:
|
1269
1293
|
if hasattr(self, prob_func):
|
1270
1294
|
output_cols_prefix: str = f"{prob_func}_"
|
1271
1295
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1272
1296
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1273
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1297
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1298
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1274
1299
|
|
1275
1300
|
@property
|
1276
1301
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|