snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -262,7 +264,6 @@ class FeatureAgglomeration(BaseTransformer):
|
|
262
264
|
sample_weight_col: Optional[str] = None,
|
263
265
|
) -> None:
|
264
266
|
super().__init__()
|
265
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
266
267
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
267
268
|
|
268
269
|
self._deps = list(deps)
|
@@ -291,6 +292,15 @@ class FeatureAgglomeration(BaseTransformer):
|
|
291
292
|
self.set_drop_input_cols(drop_input_cols)
|
292
293
|
self.set_sample_weight_col(sample_weight_col)
|
293
294
|
|
295
|
+
def _get_rand_id(self) -> str:
|
296
|
+
"""
|
297
|
+
Generate random id to be used in sproc and stage names.
|
298
|
+
|
299
|
+
Returns:
|
300
|
+
Random id string usable in sproc, table, and stage names.
|
301
|
+
"""
|
302
|
+
return str(uuid4()).replace("-", "_").upper()
|
303
|
+
|
294
304
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
295
305
|
"""
|
296
306
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -369,7 +379,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
369
379
|
cp.dump(self._sklearn_object, local_transform_file)
|
370
380
|
|
371
381
|
# Create temp stage to run fit.
|
372
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
382
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
373
383
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
374
384
|
SqlResultValidator(
|
375
385
|
session=session,
|
@@ -382,11 +392,12 @@ class FeatureAgglomeration(BaseTransformer):
|
|
382
392
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
383
393
|
).validate()
|
384
394
|
|
385
|
-
|
395
|
+
# Use posixpath to construct stage paths
|
396
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
397
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
386
398
|
local_result_file_name = get_temp_file_path()
|
387
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
388
399
|
|
389
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
400
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
390
401
|
statement_params = telemetry.get_function_usage_statement_params(
|
391
402
|
project=_PROJECT,
|
392
403
|
subproject=_SUBPROJECT,
|
@@ -412,6 +423,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
412
423
|
replace=True,
|
413
424
|
session=session,
|
414
425
|
statement_params=statement_params,
|
426
|
+
anonymous=True
|
415
427
|
)
|
416
428
|
def fit_wrapper_sproc(
|
417
429
|
session: Session,
|
@@ -420,7 +432,8 @@ class FeatureAgglomeration(BaseTransformer):
|
|
420
432
|
stage_result_file_name: str,
|
421
433
|
input_cols: List[str],
|
422
434
|
label_cols: List[str],
|
423
|
-
sample_weight_col: Optional[str]
|
435
|
+
sample_weight_col: Optional[str],
|
436
|
+
statement_params: Dict[str, str]
|
424
437
|
) -> str:
|
425
438
|
import cloudpickle as cp
|
426
439
|
import numpy as np
|
@@ -487,15 +500,15 @@ class FeatureAgglomeration(BaseTransformer):
|
|
487
500
|
api_calls=[Session.call],
|
488
501
|
custom_tags=dict([("autogen", True)]),
|
489
502
|
)
|
490
|
-
sproc_export_file_name =
|
491
|
-
|
503
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
504
|
+
session,
|
492
505
|
query,
|
493
506
|
stage_transform_file_name,
|
494
507
|
stage_result_file_name,
|
495
508
|
identifier.get_unescaped_names(self.input_cols),
|
496
509
|
identifier.get_unescaped_names(self.label_cols),
|
497
510
|
identifier.get_unescaped_names(self.sample_weight_col),
|
498
|
-
statement_params
|
511
|
+
statement_params,
|
499
512
|
)
|
500
513
|
|
501
514
|
if "|" in sproc_export_file_name:
|
@@ -505,7 +518,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
505
518
|
print("\n".join(fields[1:]))
|
506
519
|
|
507
520
|
session.file.get(
|
508
|
-
|
521
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
509
522
|
local_result_file_name,
|
510
523
|
statement_params=statement_params
|
511
524
|
)
|
@@ -551,7 +564,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
551
564
|
|
552
565
|
# Register vectorized UDF for batch inference
|
553
566
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
554
|
-
safe_id=self.
|
567
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
555
568
|
|
556
569
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
557
570
|
# will try to pickle all of self which fails.
|
@@ -643,7 +656,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
643
656
|
return transformed_pandas_df.to_dict("records")
|
644
657
|
|
645
658
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
646
|
-
safe_id=self.
|
659
|
+
safe_id=self._get_rand_id()
|
647
660
|
)
|
648
661
|
|
649
662
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -808,11 +821,18 @@ class FeatureAgglomeration(BaseTransformer):
|
|
808
821
|
Transformed dataset.
|
809
822
|
"""
|
810
823
|
if isinstance(dataset, DataFrame):
|
824
|
+
expected_type_inferred = ""
|
825
|
+
# when it is classifier, infer the datatype from label columns
|
826
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
827
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
828
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
829
|
+
)
|
830
|
+
|
811
831
|
output_df = self._batch_inference(
|
812
832
|
dataset=dataset,
|
813
833
|
inference_method="predict",
|
814
834
|
expected_output_cols_list=self.output_cols,
|
815
|
-
expected_output_cols_type=
|
835
|
+
expected_output_cols_type=expected_type_inferred,
|
816
836
|
)
|
817
837
|
elif isinstance(dataset, pd.DataFrame):
|
818
838
|
output_df = self._sklearn_inference(
|
@@ -885,10 +905,10 @@ class FeatureAgglomeration(BaseTransformer):
|
|
885
905
|
|
886
906
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
887
907
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
888
|
-
Returns
|
908
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
889
909
|
"""
|
890
910
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
891
|
-
return []
|
911
|
+
return [output_cols_prefix]
|
892
912
|
|
893
913
|
classes = self._sklearn_object.classes_
|
894
914
|
if isinstance(classes, numpy.ndarray):
|
@@ -1113,7 +1133,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
1113
1133
|
cp.dump(self._sklearn_object, local_score_file)
|
1114
1134
|
|
1115
1135
|
# Create temp stage to run score.
|
1116
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1136
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1117
1137
|
session = dataset._session
|
1118
1138
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1119
1139
|
SqlResultValidator(
|
@@ -1127,8 +1147,9 @@ class FeatureAgglomeration(BaseTransformer):
|
|
1127
1147
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1128
1148
|
).validate()
|
1129
1149
|
|
1130
|
-
|
1131
|
-
|
1150
|
+
# Use posixpath to construct stage paths
|
1151
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1152
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1132
1153
|
statement_params = telemetry.get_function_usage_statement_params(
|
1133
1154
|
project=_PROJECT,
|
1134
1155
|
subproject=_SUBPROJECT,
|
@@ -1154,6 +1175,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
1154
1175
|
replace=True,
|
1155
1176
|
session=session,
|
1156
1177
|
statement_params=statement_params,
|
1178
|
+
anonymous=True
|
1157
1179
|
)
|
1158
1180
|
def score_wrapper_sproc(
|
1159
1181
|
session: Session,
|
@@ -1161,7 +1183,8 @@ class FeatureAgglomeration(BaseTransformer):
|
|
1161
1183
|
stage_score_file_name: str,
|
1162
1184
|
input_cols: List[str],
|
1163
1185
|
label_cols: List[str],
|
1164
|
-
sample_weight_col: Optional[str]
|
1186
|
+
sample_weight_col: Optional[str],
|
1187
|
+
statement_params: Dict[str, str]
|
1165
1188
|
) -> float:
|
1166
1189
|
import cloudpickle as cp
|
1167
1190
|
import numpy as np
|
@@ -1211,14 +1234,14 @@ class FeatureAgglomeration(BaseTransformer):
|
|
1211
1234
|
api_calls=[Session.call],
|
1212
1235
|
custom_tags=dict([("autogen", True)]),
|
1213
1236
|
)
|
1214
|
-
score =
|
1215
|
-
|
1237
|
+
score = score_wrapper_sproc(
|
1238
|
+
session,
|
1216
1239
|
query,
|
1217
1240
|
stage_score_file_name,
|
1218
1241
|
identifier.get_unescaped_names(self.input_cols),
|
1219
1242
|
identifier.get_unescaped_names(self.label_cols),
|
1220
1243
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1221
|
-
statement_params
|
1244
|
+
statement_params,
|
1222
1245
|
)
|
1223
1246
|
|
1224
1247
|
cleanup_temp_files([local_score_file_name])
|
@@ -1236,18 +1259,20 @@ class FeatureAgglomeration(BaseTransformer):
|
|
1236
1259
|
if self._sklearn_object._estimator_type == 'classifier':
|
1237
1260
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1238
1261
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1239
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1262
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1263
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1240
1264
|
# For regressor, the type of predict is float64
|
1241
1265
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1242
1266
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1243
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1244
|
-
|
1267
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1268
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1245
1269
|
for prob_func in PROB_FUNCTIONS:
|
1246
1270
|
if hasattr(self, prob_func):
|
1247
1271
|
output_cols_prefix: str = f"{prob_func}_"
|
1248
1272
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1249
1273
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1250
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1274
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1275
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1251
1276
|
|
1252
1277
|
@property
|
1253
1278
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -258,7 +260,6 @@ class KMeans(BaseTransformer):
|
|
258
260
|
sample_weight_col: Optional[str] = None,
|
259
261
|
) -> None:
|
260
262
|
super().__init__()
|
261
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
262
263
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
263
264
|
|
264
265
|
self._deps = list(deps)
|
@@ -286,6 +287,15 @@ class KMeans(BaseTransformer):
|
|
286
287
|
self.set_drop_input_cols(drop_input_cols)
|
287
288
|
self.set_sample_weight_col(sample_weight_col)
|
288
289
|
|
290
|
+
def _get_rand_id(self) -> str:
|
291
|
+
"""
|
292
|
+
Generate random id to be used in sproc and stage names.
|
293
|
+
|
294
|
+
Returns:
|
295
|
+
Random id string usable in sproc, table, and stage names.
|
296
|
+
"""
|
297
|
+
return str(uuid4()).replace("-", "_").upper()
|
298
|
+
|
289
299
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
290
300
|
"""
|
291
301
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -364,7 +374,7 @@ class KMeans(BaseTransformer):
|
|
364
374
|
cp.dump(self._sklearn_object, local_transform_file)
|
365
375
|
|
366
376
|
# Create temp stage to run fit.
|
367
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
377
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
368
378
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
369
379
|
SqlResultValidator(
|
370
380
|
session=session,
|
@@ -377,11 +387,12 @@ class KMeans(BaseTransformer):
|
|
377
387
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
378
388
|
).validate()
|
379
389
|
|
380
|
-
|
390
|
+
# Use posixpath to construct stage paths
|
391
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
392
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
381
393
|
local_result_file_name = get_temp_file_path()
|
382
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
383
394
|
|
384
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
395
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
385
396
|
statement_params = telemetry.get_function_usage_statement_params(
|
386
397
|
project=_PROJECT,
|
387
398
|
subproject=_SUBPROJECT,
|
@@ -407,6 +418,7 @@ class KMeans(BaseTransformer):
|
|
407
418
|
replace=True,
|
408
419
|
session=session,
|
409
420
|
statement_params=statement_params,
|
421
|
+
anonymous=True
|
410
422
|
)
|
411
423
|
def fit_wrapper_sproc(
|
412
424
|
session: Session,
|
@@ -415,7 +427,8 @@ class KMeans(BaseTransformer):
|
|
415
427
|
stage_result_file_name: str,
|
416
428
|
input_cols: List[str],
|
417
429
|
label_cols: List[str],
|
418
|
-
sample_weight_col: Optional[str]
|
430
|
+
sample_weight_col: Optional[str],
|
431
|
+
statement_params: Dict[str, str]
|
419
432
|
) -> str:
|
420
433
|
import cloudpickle as cp
|
421
434
|
import numpy as np
|
@@ -482,15 +495,15 @@ class KMeans(BaseTransformer):
|
|
482
495
|
api_calls=[Session.call],
|
483
496
|
custom_tags=dict([("autogen", True)]),
|
484
497
|
)
|
485
|
-
sproc_export_file_name =
|
486
|
-
|
498
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
499
|
+
session,
|
487
500
|
query,
|
488
501
|
stage_transform_file_name,
|
489
502
|
stage_result_file_name,
|
490
503
|
identifier.get_unescaped_names(self.input_cols),
|
491
504
|
identifier.get_unescaped_names(self.label_cols),
|
492
505
|
identifier.get_unescaped_names(self.sample_weight_col),
|
493
|
-
statement_params
|
506
|
+
statement_params,
|
494
507
|
)
|
495
508
|
|
496
509
|
if "|" in sproc_export_file_name:
|
@@ -500,7 +513,7 @@ class KMeans(BaseTransformer):
|
|
500
513
|
print("\n".join(fields[1:]))
|
501
514
|
|
502
515
|
session.file.get(
|
503
|
-
|
516
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
504
517
|
local_result_file_name,
|
505
518
|
statement_params=statement_params
|
506
519
|
)
|
@@ -546,7 +559,7 @@ class KMeans(BaseTransformer):
|
|
546
559
|
|
547
560
|
# Register vectorized UDF for batch inference
|
548
561
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
549
|
-
safe_id=self.
|
562
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
550
563
|
|
551
564
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
552
565
|
# will try to pickle all of self which fails.
|
@@ -638,7 +651,7 @@ class KMeans(BaseTransformer):
|
|
638
651
|
return transformed_pandas_df.to_dict("records")
|
639
652
|
|
640
653
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
641
|
-
safe_id=self.
|
654
|
+
safe_id=self._get_rand_id()
|
642
655
|
)
|
643
656
|
|
644
657
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -805,11 +818,18 @@ class KMeans(BaseTransformer):
|
|
805
818
|
Transformed dataset.
|
806
819
|
"""
|
807
820
|
if isinstance(dataset, DataFrame):
|
821
|
+
expected_type_inferred = ""
|
822
|
+
# when it is classifier, infer the datatype from label columns
|
823
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
824
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
825
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
826
|
+
)
|
827
|
+
|
808
828
|
output_df = self._batch_inference(
|
809
829
|
dataset=dataset,
|
810
830
|
inference_method="predict",
|
811
831
|
expected_output_cols_list=self.output_cols,
|
812
|
-
expected_output_cols_type=
|
832
|
+
expected_output_cols_type=expected_type_inferred,
|
813
833
|
)
|
814
834
|
elif isinstance(dataset, pd.DataFrame):
|
815
835
|
output_df = self._sklearn_inference(
|
@@ -882,10 +902,10 @@ class KMeans(BaseTransformer):
|
|
882
902
|
|
883
903
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
884
904
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
885
|
-
Returns
|
905
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
886
906
|
"""
|
887
907
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
888
|
-
return []
|
908
|
+
return [output_cols_prefix]
|
889
909
|
|
890
910
|
classes = self._sklearn_object.classes_
|
891
911
|
if isinstance(classes, numpy.ndarray):
|
@@ -1110,7 +1130,7 @@ class KMeans(BaseTransformer):
|
|
1110
1130
|
cp.dump(self._sklearn_object, local_score_file)
|
1111
1131
|
|
1112
1132
|
# Create temp stage to run score.
|
1113
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1133
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1114
1134
|
session = dataset._session
|
1115
1135
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1116
1136
|
SqlResultValidator(
|
@@ -1124,8 +1144,9 @@ class KMeans(BaseTransformer):
|
|
1124
1144
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1125
1145
|
).validate()
|
1126
1146
|
|
1127
|
-
|
1128
|
-
|
1147
|
+
# Use posixpath to construct stage paths
|
1148
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1149
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1129
1150
|
statement_params = telemetry.get_function_usage_statement_params(
|
1130
1151
|
project=_PROJECT,
|
1131
1152
|
subproject=_SUBPROJECT,
|
@@ -1151,6 +1172,7 @@ class KMeans(BaseTransformer):
|
|
1151
1172
|
replace=True,
|
1152
1173
|
session=session,
|
1153
1174
|
statement_params=statement_params,
|
1175
|
+
anonymous=True
|
1154
1176
|
)
|
1155
1177
|
def score_wrapper_sproc(
|
1156
1178
|
session: Session,
|
@@ -1158,7 +1180,8 @@ class KMeans(BaseTransformer):
|
|
1158
1180
|
stage_score_file_name: str,
|
1159
1181
|
input_cols: List[str],
|
1160
1182
|
label_cols: List[str],
|
1161
|
-
sample_weight_col: Optional[str]
|
1183
|
+
sample_weight_col: Optional[str],
|
1184
|
+
statement_params: Dict[str, str]
|
1162
1185
|
) -> float:
|
1163
1186
|
import cloudpickle as cp
|
1164
1187
|
import numpy as np
|
@@ -1208,14 +1231,14 @@ class KMeans(BaseTransformer):
|
|
1208
1231
|
api_calls=[Session.call],
|
1209
1232
|
custom_tags=dict([("autogen", True)]),
|
1210
1233
|
)
|
1211
|
-
score =
|
1212
|
-
|
1234
|
+
score = score_wrapper_sproc(
|
1235
|
+
session,
|
1213
1236
|
query,
|
1214
1237
|
stage_score_file_name,
|
1215
1238
|
identifier.get_unescaped_names(self.input_cols),
|
1216
1239
|
identifier.get_unescaped_names(self.label_cols),
|
1217
1240
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1218
|
-
statement_params
|
1241
|
+
statement_params,
|
1219
1242
|
)
|
1220
1243
|
|
1221
1244
|
cleanup_temp_files([local_score_file_name])
|
@@ -1233,18 +1256,20 @@ class KMeans(BaseTransformer):
|
|
1233
1256
|
if self._sklearn_object._estimator_type == 'classifier':
|
1234
1257
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1235
1258
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1236
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1259
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1260
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1237
1261
|
# For regressor, the type of predict is float64
|
1238
1262
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1239
1263
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1240
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1241
|
-
|
1264
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1265
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1242
1266
|
for prob_func in PROB_FUNCTIONS:
|
1243
1267
|
if hasattr(self, prob_func):
|
1244
1268
|
output_cols_prefix: str = f"{prob_func}_"
|
1245
1269
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1246
1270
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1247
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1271
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1272
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1248
1273
|
|
1249
1274
|
@property
|
1250
1275
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|