snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -256,7 +258,6 @@ class AgglomerativeClustering(BaseTransformer):
|
|
256
258
|
sample_weight_col: Optional[str] = None,
|
257
259
|
) -> None:
|
258
260
|
super().__init__()
|
259
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
260
261
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
261
262
|
|
262
263
|
self._deps = list(deps)
|
@@ -284,6 +285,15 @@ class AgglomerativeClustering(BaseTransformer):
|
|
284
285
|
self.set_drop_input_cols(drop_input_cols)
|
285
286
|
self.set_sample_weight_col(sample_weight_col)
|
286
287
|
|
288
|
+
def _get_rand_id(self) -> str:
|
289
|
+
"""
|
290
|
+
Generate random id to be used in sproc and stage names.
|
291
|
+
|
292
|
+
Returns:
|
293
|
+
Random id string usable in sproc, table, and stage names.
|
294
|
+
"""
|
295
|
+
return str(uuid4()).replace("-", "_").upper()
|
296
|
+
|
287
297
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
288
298
|
"""
|
289
299
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -362,7 +372,7 @@ class AgglomerativeClustering(BaseTransformer):
|
|
362
372
|
cp.dump(self._sklearn_object, local_transform_file)
|
363
373
|
|
364
374
|
# Create temp stage to run fit.
|
365
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
375
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
366
376
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
367
377
|
SqlResultValidator(
|
368
378
|
session=session,
|
@@ -375,11 +385,12 @@ class AgglomerativeClustering(BaseTransformer):
|
|
375
385
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
376
386
|
).validate()
|
377
387
|
|
378
|
-
|
388
|
+
# Use posixpath to construct stage paths
|
389
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
390
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
379
391
|
local_result_file_name = get_temp_file_path()
|
380
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
381
392
|
|
382
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
393
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
383
394
|
statement_params = telemetry.get_function_usage_statement_params(
|
384
395
|
project=_PROJECT,
|
385
396
|
subproject=_SUBPROJECT,
|
@@ -405,6 +416,7 @@ class AgglomerativeClustering(BaseTransformer):
|
|
405
416
|
replace=True,
|
406
417
|
session=session,
|
407
418
|
statement_params=statement_params,
|
419
|
+
anonymous=True
|
408
420
|
)
|
409
421
|
def fit_wrapper_sproc(
|
410
422
|
session: Session,
|
@@ -413,7 +425,8 @@ class AgglomerativeClustering(BaseTransformer):
|
|
413
425
|
stage_result_file_name: str,
|
414
426
|
input_cols: List[str],
|
415
427
|
label_cols: List[str],
|
416
|
-
sample_weight_col: Optional[str]
|
428
|
+
sample_weight_col: Optional[str],
|
429
|
+
statement_params: Dict[str, str]
|
417
430
|
) -> str:
|
418
431
|
import cloudpickle as cp
|
419
432
|
import numpy as np
|
@@ -480,15 +493,15 @@ class AgglomerativeClustering(BaseTransformer):
|
|
480
493
|
api_calls=[Session.call],
|
481
494
|
custom_tags=dict([("autogen", True)]),
|
482
495
|
)
|
483
|
-
sproc_export_file_name =
|
484
|
-
|
496
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
497
|
+
session,
|
485
498
|
query,
|
486
499
|
stage_transform_file_name,
|
487
500
|
stage_result_file_name,
|
488
501
|
identifier.get_unescaped_names(self.input_cols),
|
489
502
|
identifier.get_unescaped_names(self.label_cols),
|
490
503
|
identifier.get_unescaped_names(self.sample_weight_col),
|
491
|
-
statement_params
|
504
|
+
statement_params,
|
492
505
|
)
|
493
506
|
|
494
507
|
if "|" in sproc_export_file_name:
|
@@ -498,7 +511,7 @@ class AgglomerativeClustering(BaseTransformer):
|
|
498
511
|
print("\n".join(fields[1:]))
|
499
512
|
|
500
513
|
session.file.get(
|
501
|
-
|
514
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
502
515
|
local_result_file_name,
|
503
516
|
statement_params=statement_params
|
504
517
|
)
|
@@ -544,7 +557,7 @@ class AgglomerativeClustering(BaseTransformer):
|
|
544
557
|
|
545
558
|
# Register vectorized UDF for batch inference
|
546
559
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
547
|
-
safe_id=self.
|
560
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
548
561
|
|
549
562
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
550
563
|
# will try to pickle all of self which fails.
|
@@ -636,7 +649,7 @@ class AgglomerativeClustering(BaseTransformer):
|
|
636
649
|
return transformed_pandas_df.to_dict("records")
|
637
650
|
|
638
651
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
639
|
-
safe_id=self.
|
652
|
+
safe_id=self._get_rand_id()
|
640
653
|
)
|
641
654
|
|
642
655
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -801,11 +814,18 @@ class AgglomerativeClustering(BaseTransformer):
|
|
801
814
|
Transformed dataset.
|
802
815
|
"""
|
803
816
|
if isinstance(dataset, DataFrame):
|
817
|
+
expected_type_inferred = ""
|
818
|
+
# when it is classifier, infer the datatype from label columns
|
819
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
820
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
821
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
822
|
+
)
|
823
|
+
|
804
824
|
output_df = self._batch_inference(
|
805
825
|
dataset=dataset,
|
806
826
|
inference_method="predict",
|
807
827
|
expected_output_cols_list=self.output_cols,
|
808
|
-
expected_output_cols_type=
|
828
|
+
expected_output_cols_type=expected_type_inferred,
|
809
829
|
)
|
810
830
|
elif isinstance(dataset, pd.DataFrame):
|
811
831
|
output_df = self._sklearn_inference(
|
@@ -876,10 +896,10 @@ class AgglomerativeClustering(BaseTransformer):
|
|
876
896
|
|
877
897
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
878
898
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
879
|
-
Returns
|
899
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
880
900
|
"""
|
881
901
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
882
|
-
return []
|
902
|
+
return [output_cols_prefix]
|
883
903
|
|
884
904
|
classes = self._sklearn_object.classes_
|
885
905
|
if isinstance(classes, numpy.ndarray):
|
@@ -1104,7 +1124,7 @@ class AgglomerativeClustering(BaseTransformer):
|
|
1104
1124
|
cp.dump(self._sklearn_object, local_score_file)
|
1105
1125
|
|
1106
1126
|
# Create temp stage to run score.
|
1107
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1127
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1108
1128
|
session = dataset._session
|
1109
1129
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1110
1130
|
SqlResultValidator(
|
@@ -1118,8 +1138,9 @@ class AgglomerativeClustering(BaseTransformer):
|
|
1118
1138
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1119
1139
|
).validate()
|
1120
1140
|
|
1121
|
-
|
1122
|
-
|
1141
|
+
# Use posixpath to construct stage paths
|
1142
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1143
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1123
1144
|
statement_params = telemetry.get_function_usage_statement_params(
|
1124
1145
|
project=_PROJECT,
|
1125
1146
|
subproject=_SUBPROJECT,
|
@@ -1145,6 +1166,7 @@ class AgglomerativeClustering(BaseTransformer):
|
|
1145
1166
|
replace=True,
|
1146
1167
|
session=session,
|
1147
1168
|
statement_params=statement_params,
|
1169
|
+
anonymous=True
|
1148
1170
|
)
|
1149
1171
|
def score_wrapper_sproc(
|
1150
1172
|
session: Session,
|
@@ -1152,7 +1174,8 @@ class AgglomerativeClustering(BaseTransformer):
|
|
1152
1174
|
stage_score_file_name: str,
|
1153
1175
|
input_cols: List[str],
|
1154
1176
|
label_cols: List[str],
|
1155
|
-
sample_weight_col: Optional[str]
|
1177
|
+
sample_weight_col: Optional[str],
|
1178
|
+
statement_params: Dict[str, str]
|
1156
1179
|
) -> float:
|
1157
1180
|
import cloudpickle as cp
|
1158
1181
|
import numpy as np
|
@@ -1202,14 +1225,14 @@ class AgglomerativeClustering(BaseTransformer):
|
|
1202
1225
|
api_calls=[Session.call],
|
1203
1226
|
custom_tags=dict([("autogen", True)]),
|
1204
1227
|
)
|
1205
|
-
score =
|
1206
|
-
|
1228
|
+
score = score_wrapper_sproc(
|
1229
|
+
session,
|
1207
1230
|
query,
|
1208
1231
|
stage_score_file_name,
|
1209
1232
|
identifier.get_unescaped_names(self.input_cols),
|
1210
1233
|
identifier.get_unescaped_names(self.label_cols),
|
1211
1234
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1212
|
-
statement_params
|
1235
|
+
statement_params,
|
1213
1236
|
)
|
1214
1237
|
|
1215
1238
|
cleanup_temp_files([local_score_file_name])
|
@@ -1227,18 +1250,20 @@ class AgglomerativeClustering(BaseTransformer):
|
|
1227
1250
|
if self._sklearn_object._estimator_type == 'classifier':
|
1228
1251
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1229
1252
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1230
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1253
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1254
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1231
1255
|
# For regressor, the type of predict is float64
|
1232
1256
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1233
1257
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1234
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1235
|
-
|
1258
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1259
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1236
1260
|
for prob_func in PROB_FUNCTIONS:
|
1237
1261
|
if hasattr(self, prob_func):
|
1238
1262
|
output_cols_prefix: str = f"{prob_func}_"
|
1239
1263
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1240
1264
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1241
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1265
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1266
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1242
1267
|
|
1243
1268
|
@property
|
1244
1269
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -218,7 +220,6 @@ class Birch(BaseTransformer):
|
|
218
220
|
sample_weight_col: Optional[str] = None,
|
219
221
|
) -> None:
|
220
222
|
super().__init__()
|
221
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
222
223
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
223
224
|
|
224
225
|
self._deps = list(deps)
|
@@ -242,6 +243,15 @@ class Birch(BaseTransformer):
|
|
242
243
|
self.set_drop_input_cols(drop_input_cols)
|
243
244
|
self.set_sample_weight_col(sample_weight_col)
|
244
245
|
|
246
|
+
def _get_rand_id(self) -> str:
|
247
|
+
"""
|
248
|
+
Generate random id to be used in sproc and stage names.
|
249
|
+
|
250
|
+
Returns:
|
251
|
+
Random id string usable in sproc, table, and stage names.
|
252
|
+
"""
|
253
|
+
return str(uuid4()).replace("-", "_").upper()
|
254
|
+
|
245
255
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
246
256
|
"""
|
247
257
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -320,7 +330,7 @@ class Birch(BaseTransformer):
|
|
320
330
|
cp.dump(self._sklearn_object, local_transform_file)
|
321
331
|
|
322
332
|
# Create temp stage to run fit.
|
323
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
333
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
324
334
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
325
335
|
SqlResultValidator(
|
326
336
|
session=session,
|
@@ -333,11 +343,12 @@ class Birch(BaseTransformer):
|
|
333
343
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
334
344
|
).validate()
|
335
345
|
|
336
|
-
|
346
|
+
# Use posixpath to construct stage paths
|
347
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
348
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
337
349
|
local_result_file_name = get_temp_file_path()
|
338
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
339
350
|
|
340
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
351
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
341
352
|
statement_params = telemetry.get_function_usage_statement_params(
|
342
353
|
project=_PROJECT,
|
343
354
|
subproject=_SUBPROJECT,
|
@@ -363,6 +374,7 @@ class Birch(BaseTransformer):
|
|
363
374
|
replace=True,
|
364
375
|
session=session,
|
365
376
|
statement_params=statement_params,
|
377
|
+
anonymous=True
|
366
378
|
)
|
367
379
|
def fit_wrapper_sproc(
|
368
380
|
session: Session,
|
@@ -371,7 +383,8 @@ class Birch(BaseTransformer):
|
|
371
383
|
stage_result_file_name: str,
|
372
384
|
input_cols: List[str],
|
373
385
|
label_cols: List[str],
|
374
|
-
sample_weight_col: Optional[str]
|
386
|
+
sample_weight_col: Optional[str],
|
387
|
+
statement_params: Dict[str, str]
|
375
388
|
) -> str:
|
376
389
|
import cloudpickle as cp
|
377
390
|
import numpy as np
|
@@ -438,15 +451,15 @@ class Birch(BaseTransformer):
|
|
438
451
|
api_calls=[Session.call],
|
439
452
|
custom_tags=dict([("autogen", True)]),
|
440
453
|
)
|
441
|
-
sproc_export_file_name =
|
442
|
-
|
454
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
455
|
+
session,
|
443
456
|
query,
|
444
457
|
stage_transform_file_name,
|
445
458
|
stage_result_file_name,
|
446
459
|
identifier.get_unescaped_names(self.input_cols),
|
447
460
|
identifier.get_unescaped_names(self.label_cols),
|
448
461
|
identifier.get_unescaped_names(self.sample_weight_col),
|
449
|
-
statement_params
|
462
|
+
statement_params,
|
450
463
|
)
|
451
464
|
|
452
465
|
if "|" in sproc_export_file_name:
|
@@ -456,7 +469,7 @@ class Birch(BaseTransformer):
|
|
456
469
|
print("\n".join(fields[1:]))
|
457
470
|
|
458
471
|
session.file.get(
|
459
|
-
|
472
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
460
473
|
local_result_file_name,
|
461
474
|
statement_params=statement_params
|
462
475
|
)
|
@@ -502,7 +515,7 @@ class Birch(BaseTransformer):
|
|
502
515
|
|
503
516
|
# Register vectorized UDF for batch inference
|
504
517
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
505
|
-
safe_id=self.
|
518
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
506
519
|
|
507
520
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
508
521
|
# will try to pickle all of self which fails.
|
@@ -594,7 +607,7 @@ class Birch(BaseTransformer):
|
|
594
607
|
return transformed_pandas_df.to_dict("records")
|
595
608
|
|
596
609
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
597
|
-
safe_id=self.
|
610
|
+
safe_id=self._get_rand_id()
|
598
611
|
)
|
599
612
|
|
600
613
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -761,11 +774,18 @@ class Birch(BaseTransformer):
|
|
761
774
|
Transformed dataset.
|
762
775
|
"""
|
763
776
|
if isinstance(dataset, DataFrame):
|
777
|
+
expected_type_inferred = ""
|
778
|
+
# when it is classifier, infer the datatype from label columns
|
779
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
780
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
781
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
782
|
+
)
|
783
|
+
|
764
784
|
output_df = self._batch_inference(
|
765
785
|
dataset=dataset,
|
766
786
|
inference_method="predict",
|
767
787
|
expected_output_cols_list=self.output_cols,
|
768
|
-
expected_output_cols_type=
|
788
|
+
expected_output_cols_type=expected_type_inferred,
|
769
789
|
)
|
770
790
|
elif isinstance(dataset, pd.DataFrame):
|
771
791
|
output_df = self._sklearn_inference(
|
@@ -838,10 +858,10 @@ class Birch(BaseTransformer):
|
|
838
858
|
|
839
859
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
840
860
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
841
|
-
Returns
|
861
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
842
862
|
"""
|
843
863
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
844
|
-
return []
|
864
|
+
return [output_cols_prefix]
|
845
865
|
|
846
866
|
classes = self._sklearn_object.classes_
|
847
867
|
if isinstance(classes, numpy.ndarray):
|
@@ -1066,7 +1086,7 @@ class Birch(BaseTransformer):
|
|
1066
1086
|
cp.dump(self._sklearn_object, local_score_file)
|
1067
1087
|
|
1068
1088
|
# Create temp stage to run score.
|
1069
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1089
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1070
1090
|
session = dataset._session
|
1071
1091
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1072
1092
|
SqlResultValidator(
|
@@ -1080,8 +1100,9 @@ class Birch(BaseTransformer):
|
|
1080
1100
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1081
1101
|
).validate()
|
1082
1102
|
|
1083
|
-
|
1084
|
-
|
1103
|
+
# Use posixpath to construct stage paths
|
1104
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1105
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1085
1106
|
statement_params = telemetry.get_function_usage_statement_params(
|
1086
1107
|
project=_PROJECT,
|
1087
1108
|
subproject=_SUBPROJECT,
|
@@ -1107,6 +1128,7 @@ class Birch(BaseTransformer):
|
|
1107
1128
|
replace=True,
|
1108
1129
|
session=session,
|
1109
1130
|
statement_params=statement_params,
|
1131
|
+
anonymous=True
|
1110
1132
|
)
|
1111
1133
|
def score_wrapper_sproc(
|
1112
1134
|
session: Session,
|
@@ -1114,7 +1136,8 @@ class Birch(BaseTransformer):
|
|
1114
1136
|
stage_score_file_name: str,
|
1115
1137
|
input_cols: List[str],
|
1116
1138
|
label_cols: List[str],
|
1117
|
-
sample_weight_col: Optional[str]
|
1139
|
+
sample_weight_col: Optional[str],
|
1140
|
+
statement_params: Dict[str, str]
|
1118
1141
|
) -> float:
|
1119
1142
|
import cloudpickle as cp
|
1120
1143
|
import numpy as np
|
@@ -1164,14 +1187,14 @@ class Birch(BaseTransformer):
|
|
1164
1187
|
api_calls=[Session.call],
|
1165
1188
|
custom_tags=dict([("autogen", True)]),
|
1166
1189
|
)
|
1167
|
-
score =
|
1168
|
-
|
1190
|
+
score = score_wrapper_sproc(
|
1191
|
+
session,
|
1169
1192
|
query,
|
1170
1193
|
stage_score_file_name,
|
1171
1194
|
identifier.get_unescaped_names(self.input_cols),
|
1172
1195
|
identifier.get_unescaped_names(self.label_cols),
|
1173
1196
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1174
|
-
statement_params
|
1197
|
+
statement_params,
|
1175
1198
|
)
|
1176
1199
|
|
1177
1200
|
cleanup_temp_files([local_score_file_name])
|
@@ -1189,18 +1212,20 @@ class Birch(BaseTransformer):
|
|
1189
1212
|
if self._sklearn_object._estimator_type == 'classifier':
|
1190
1213
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1191
1214
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1192
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1215
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1216
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1193
1217
|
# For regressor, the type of predict is float64
|
1194
1218
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1195
1219
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1196
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1197
|
-
|
1220
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1198
1222
|
for prob_func in PROB_FUNCTIONS:
|
1199
1223
|
if hasattr(self, prob_func):
|
1200
1224
|
output_cols_prefix: str = f"{prob_func}_"
|
1201
1225
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1202
1226
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1203
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1227
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1228
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1204
1229
|
|
1205
1230
|
@property
|
1206
1231
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|