snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -298,7 +300,6 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
298
300
|
sample_weight_col: Optional[str] = None,
|
299
301
|
) -> None:
|
300
302
|
super().__init__()
|
301
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
302
303
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
303
304
|
|
304
305
|
self._deps = list(deps)
|
@@ -334,6 +335,15 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
334
335
|
self.set_drop_input_cols(drop_input_cols)
|
335
336
|
self.set_sample_weight_col(sample_weight_col)
|
336
337
|
|
338
|
+
def _get_rand_id(self) -> str:
|
339
|
+
"""
|
340
|
+
Generate random id to be used in sproc and stage names.
|
341
|
+
|
342
|
+
Returns:
|
343
|
+
Random id string usable in sproc, table, and stage names.
|
344
|
+
"""
|
345
|
+
return str(uuid4()).replace("-", "_").upper()
|
346
|
+
|
337
347
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
338
348
|
"""
|
339
349
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -412,7 +422,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
412
422
|
cp.dump(self._sklearn_object, local_transform_file)
|
413
423
|
|
414
424
|
# Create temp stage to run fit.
|
415
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
425
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
416
426
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
417
427
|
SqlResultValidator(
|
418
428
|
session=session,
|
@@ -425,11 +435,12 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
425
435
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
426
436
|
).validate()
|
427
437
|
|
428
|
-
|
438
|
+
# Use posixpath to construct stage paths
|
439
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
440
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
429
441
|
local_result_file_name = get_temp_file_path()
|
430
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
431
442
|
|
432
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
443
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
433
444
|
statement_params = telemetry.get_function_usage_statement_params(
|
434
445
|
project=_PROJECT,
|
435
446
|
subproject=_SUBPROJECT,
|
@@ -455,6 +466,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
455
466
|
replace=True,
|
456
467
|
session=session,
|
457
468
|
statement_params=statement_params,
|
469
|
+
anonymous=True
|
458
470
|
)
|
459
471
|
def fit_wrapper_sproc(
|
460
472
|
session: Session,
|
@@ -463,7 +475,8 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
463
475
|
stage_result_file_name: str,
|
464
476
|
input_cols: List[str],
|
465
477
|
label_cols: List[str],
|
466
|
-
sample_weight_col: Optional[str]
|
478
|
+
sample_weight_col: Optional[str],
|
479
|
+
statement_params: Dict[str, str]
|
467
480
|
) -> str:
|
468
481
|
import cloudpickle as cp
|
469
482
|
import numpy as np
|
@@ -530,15 +543,15 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
530
543
|
api_calls=[Session.call],
|
531
544
|
custom_tags=dict([("autogen", True)]),
|
532
545
|
)
|
533
|
-
sproc_export_file_name =
|
534
|
-
|
546
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
547
|
+
session,
|
535
548
|
query,
|
536
549
|
stage_transform_file_name,
|
537
550
|
stage_result_file_name,
|
538
551
|
identifier.get_unescaped_names(self.input_cols),
|
539
552
|
identifier.get_unescaped_names(self.label_cols),
|
540
553
|
identifier.get_unescaped_names(self.sample_weight_col),
|
541
|
-
statement_params
|
554
|
+
statement_params,
|
542
555
|
)
|
543
556
|
|
544
557
|
if "|" in sproc_export_file_name:
|
@@ -548,7 +561,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
548
561
|
print("\n".join(fields[1:]))
|
549
562
|
|
550
563
|
session.file.get(
|
551
|
-
|
564
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
552
565
|
local_result_file_name,
|
553
566
|
statement_params=statement_params
|
554
567
|
)
|
@@ -594,7 +607,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
594
607
|
|
595
608
|
# Register vectorized UDF for batch inference
|
596
609
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
597
|
-
safe_id=self.
|
610
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
598
611
|
|
599
612
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
600
613
|
# will try to pickle all of self which fails.
|
@@ -686,7 +699,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
686
699
|
return transformed_pandas_df.to_dict("records")
|
687
700
|
|
688
701
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
689
|
-
safe_id=self.
|
702
|
+
safe_id=self._get_rand_id()
|
690
703
|
)
|
691
704
|
|
692
705
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -853,11 +866,18 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
853
866
|
Transformed dataset.
|
854
867
|
"""
|
855
868
|
if isinstance(dataset, DataFrame):
|
869
|
+
expected_type_inferred = ""
|
870
|
+
# when it is classifier, infer the datatype from label columns
|
871
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
872
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
873
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
874
|
+
)
|
875
|
+
|
856
876
|
output_df = self._batch_inference(
|
857
877
|
dataset=dataset,
|
858
878
|
inference_method="predict",
|
859
879
|
expected_output_cols_list=self.output_cols,
|
860
|
-
expected_output_cols_type=
|
880
|
+
expected_output_cols_type=expected_type_inferred,
|
861
881
|
)
|
862
882
|
elif isinstance(dataset, pd.DataFrame):
|
863
883
|
output_df = self._sklearn_inference(
|
@@ -928,10 +948,10 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
928
948
|
|
929
949
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
930
950
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
931
|
-
Returns
|
951
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
932
952
|
"""
|
933
953
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
934
|
-
return []
|
954
|
+
return [output_cols_prefix]
|
935
955
|
|
936
956
|
classes = self._sklearn_object.classes_
|
937
957
|
if isinstance(classes, numpy.ndarray):
|
@@ -1160,7 +1180,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
1160
1180
|
cp.dump(self._sklearn_object, local_score_file)
|
1161
1181
|
|
1162
1182
|
# Create temp stage to run score.
|
1163
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1183
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1164
1184
|
session = dataset._session
|
1165
1185
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1166
1186
|
SqlResultValidator(
|
@@ -1174,8 +1194,9 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
1174
1194
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1175
1195
|
).validate()
|
1176
1196
|
|
1177
|
-
|
1178
|
-
|
1197
|
+
# Use posixpath to construct stage paths
|
1198
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1199
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1179
1200
|
statement_params = telemetry.get_function_usage_statement_params(
|
1180
1201
|
project=_PROJECT,
|
1181
1202
|
subproject=_SUBPROJECT,
|
@@ -1201,6 +1222,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
1201
1222
|
replace=True,
|
1202
1223
|
session=session,
|
1203
1224
|
statement_params=statement_params,
|
1225
|
+
anonymous=True
|
1204
1226
|
)
|
1205
1227
|
def score_wrapper_sproc(
|
1206
1228
|
session: Session,
|
@@ -1208,7 +1230,8 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
1208
1230
|
stage_score_file_name: str,
|
1209
1231
|
input_cols: List[str],
|
1210
1232
|
label_cols: List[str],
|
1211
|
-
sample_weight_col: Optional[str]
|
1233
|
+
sample_weight_col: Optional[str],
|
1234
|
+
statement_params: Dict[str, str]
|
1212
1235
|
) -> float:
|
1213
1236
|
import cloudpickle as cp
|
1214
1237
|
import numpy as np
|
@@ -1258,14 +1281,14 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
1258
1281
|
api_calls=[Session.call],
|
1259
1282
|
custom_tags=dict([("autogen", True)]),
|
1260
1283
|
)
|
1261
|
-
score =
|
1262
|
-
|
1284
|
+
score = score_wrapper_sproc(
|
1285
|
+
session,
|
1263
1286
|
query,
|
1264
1287
|
stage_score_file_name,
|
1265
1288
|
identifier.get_unescaped_names(self.input_cols),
|
1266
1289
|
identifier.get_unescaped_names(self.label_cols),
|
1267
1290
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1268
|
-
statement_params
|
1291
|
+
statement_params,
|
1269
1292
|
)
|
1270
1293
|
|
1271
1294
|
cleanup_temp_files([local_score_file_name])
|
@@ -1283,18 +1306,20 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
1283
1306
|
if self._sklearn_object._estimator_type == 'classifier':
|
1284
1307
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1285
1308
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1286
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1309
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1310
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1287
1311
|
# For regressor, the type of predict is float64
|
1288
1312
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1289
1313
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1290
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1291
|
-
|
1314
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1315
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1292
1316
|
for prob_func in PROB_FUNCTIONS:
|
1293
1317
|
if hasattr(self, prob_func):
|
1294
1318
|
output_cols_prefix: str = f"{prob_func}_"
|
1295
1319
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1296
1320
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1297
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1321
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1322
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1298
1323
|
|
1299
1324
|
@property
|
1300
1325
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -274,7 +276,6 @@ class GaussianMixture(BaseTransformer):
|
|
274
276
|
sample_weight_col: Optional[str] = None,
|
275
277
|
) -> None:
|
276
278
|
super().__init__()
|
277
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
278
279
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
279
280
|
|
280
281
|
self._deps = list(deps)
|
@@ -307,6 +308,15 @@ class GaussianMixture(BaseTransformer):
|
|
307
308
|
self.set_drop_input_cols(drop_input_cols)
|
308
309
|
self.set_sample_weight_col(sample_weight_col)
|
309
310
|
|
311
|
+
def _get_rand_id(self) -> str:
|
312
|
+
"""
|
313
|
+
Generate random id to be used in sproc and stage names.
|
314
|
+
|
315
|
+
Returns:
|
316
|
+
Random id string usable in sproc, table, and stage names.
|
317
|
+
"""
|
318
|
+
return str(uuid4()).replace("-", "_").upper()
|
319
|
+
|
310
320
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
311
321
|
"""
|
312
322
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -385,7 +395,7 @@ class GaussianMixture(BaseTransformer):
|
|
385
395
|
cp.dump(self._sklearn_object, local_transform_file)
|
386
396
|
|
387
397
|
# Create temp stage to run fit.
|
388
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
398
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
389
399
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
390
400
|
SqlResultValidator(
|
391
401
|
session=session,
|
@@ -398,11 +408,12 @@ class GaussianMixture(BaseTransformer):
|
|
398
408
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
399
409
|
).validate()
|
400
410
|
|
401
|
-
|
411
|
+
# Use posixpath to construct stage paths
|
412
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
413
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
402
414
|
local_result_file_name = get_temp_file_path()
|
403
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
404
415
|
|
405
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
416
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
406
417
|
statement_params = telemetry.get_function_usage_statement_params(
|
407
418
|
project=_PROJECT,
|
408
419
|
subproject=_SUBPROJECT,
|
@@ -428,6 +439,7 @@ class GaussianMixture(BaseTransformer):
|
|
428
439
|
replace=True,
|
429
440
|
session=session,
|
430
441
|
statement_params=statement_params,
|
442
|
+
anonymous=True
|
431
443
|
)
|
432
444
|
def fit_wrapper_sproc(
|
433
445
|
session: Session,
|
@@ -436,7 +448,8 @@ class GaussianMixture(BaseTransformer):
|
|
436
448
|
stage_result_file_name: str,
|
437
449
|
input_cols: List[str],
|
438
450
|
label_cols: List[str],
|
439
|
-
sample_weight_col: Optional[str]
|
451
|
+
sample_weight_col: Optional[str],
|
452
|
+
statement_params: Dict[str, str]
|
440
453
|
) -> str:
|
441
454
|
import cloudpickle as cp
|
442
455
|
import numpy as np
|
@@ -503,15 +516,15 @@ class GaussianMixture(BaseTransformer):
|
|
503
516
|
api_calls=[Session.call],
|
504
517
|
custom_tags=dict([("autogen", True)]),
|
505
518
|
)
|
506
|
-
sproc_export_file_name =
|
507
|
-
|
519
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
520
|
+
session,
|
508
521
|
query,
|
509
522
|
stage_transform_file_name,
|
510
523
|
stage_result_file_name,
|
511
524
|
identifier.get_unescaped_names(self.input_cols),
|
512
525
|
identifier.get_unescaped_names(self.label_cols),
|
513
526
|
identifier.get_unescaped_names(self.sample_weight_col),
|
514
|
-
statement_params
|
527
|
+
statement_params,
|
515
528
|
)
|
516
529
|
|
517
530
|
if "|" in sproc_export_file_name:
|
@@ -521,7 +534,7 @@ class GaussianMixture(BaseTransformer):
|
|
521
534
|
print("\n".join(fields[1:]))
|
522
535
|
|
523
536
|
session.file.get(
|
524
|
-
|
537
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
525
538
|
local_result_file_name,
|
526
539
|
statement_params=statement_params
|
527
540
|
)
|
@@ -567,7 +580,7 @@ class GaussianMixture(BaseTransformer):
|
|
567
580
|
|
568
581
|
# Register vectorized UDF for batch inference
|
569
582
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
570
|
-
safe_id=self.
|
583
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
571
584
|
|
572
585
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
573
586
|
# will try to pickle all of self which fails.
|
@@ -659,7 +672,7 @@ class GaussianMixture(BaseTransformer):
|
|
659
672
|
return transformed_pandas_df.to_dict("records")
|
660
673
|
|
661
674
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
662
|
-
safe_id=self.
|
675
|
+
safe_id=self._get_rand_id()
|
663
676
|
)
|
664
677
|
|
665
678
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -826,11 +839,18 @@ class GaussianMixture(BaseTransformer):
|
|
826
839
|
Transformed dataset.
|
827
840
|
"""
|
828
841
|
if isinstance(dataset, DataFrame):
|
842
|
+
expected_type_inferred = ""
|
843
|
+
# when it is classifier, infer the datatype from label columns
|
844
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
845
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
846
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
847
|
+
)
|
848
|
+
|
829
849
|
output_df = self._batch_inference(
|
830
850
|
dataset=dataset,
|
831
851
|
inference_method="predict",
|
832
852
|
expected_output_cols_list=self.output_cols,
|
833
|
-
expected_output_cols_type=
|
853
|
+
expected_output_cols_type=expected_type_inferred,
|
834
854
|
)
|
835
855
|
elif isinstance(dataset, pd.DataFrame):
|
836
856
|
output_df = self._sklearn_inference(
|
@@ -901,10 +921,10 @@ class GaussianMixture(BaseTransformer):
|
|
901
921
|
|
902
922
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
903
923
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
904
|
-
Returns
|
924
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
905
925
|
"""
|
906
926
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
907
|
-
return []
|
927
|
+
return [output_cols_prefix]
|
908
928
|
|
909
929
|
classes = self._sklearn_object.classes_
|
910
930
|
if isinstance(classes, numpy.ndarray):
|
@@ -1133,7 +1153,7 @@ class GaussianMixture(BaseTransformer):
|
|
1133
1153
|
cp.dump(self._sklearn_object, local_score_file)
|
1134
1154
|
|
1135
1155
|
# Create temp stage to run score.
|
1136
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1156
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1137
1157
|
session = dataset._session
|
1138
1158
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1139
1159
|
SqlResultValidator(
|
@@ -1147,8 +1167,9 @@ class GaussianMixture(BaseTransformer):
|
|
1147
1167
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1148
1168
|
).validate()
|
1149
1169
|
|
1150
|
-
|
1151
|
-
|
1170
|
+
# Use posixpath to construct stage paths
|
1171
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1172
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1152
1173
|
statement_params = telemetry.get_function_usage_statement_params(
|
1153
1174
|
project=_PROJECT,
|
1154
1175
|
subproject=_SUBPROJECT,
|
@@ -1174,6 +1195,7 @@ class GaussianMixture(BaseTransformer):
|
|
1174
1195
|
replace=True,
|
1175
1196
|
session=session,
|
1176
1197
|
statement_params=statement_params,
|
1198
|
+
anonymous=True
|
1177
1199
|
)
|
1178
1200
|
def score_wrapper_sproc(
|
1179
1201
|
session: Session,
|
@@ -1181,7 +1203,8 @@ class GaussianMixture(BaseTransformer):
|
|
1181
1203
|
stage_score_file_name: str,
|
1182
1204
|
input_cols: List[str],
|
1183
1205
|
label_cols: List[str],
|
1184
|
-
sample_weight_col: Optional[str]
|
1206
|
+
sample_weight_col: Optional[str],
|
1207
|
+
statement_params: Dict[str, str]
|
1185
1208
|
) -> float:
|
1186
1209
|
import cloudpickle as cp
|
1187
1210
|
import numpy as np
|
@@ -1231,14 +1254,14 @@ class GaussianMixture(BaseTransformer):
|
|
1231
1254
|
api_calls=[Session.call],
|
1232
1255
|
custom_tags=dict([("autogen", True)]),
|
1233
1256
|
)
|
1234
|
-
score =
|
1235
|
-
|
1257
|
+
score = score_wrapper_sproc(
|
1258
|
+
session,
|
1236
1259
|
query,
|
1237
1260
|
stage_score_file_name,
|
1238
1261
|
identifier.get_unescaped_names(self.input_cols),
|
1239
1262
|
identifier.get_unescaped_names(self.label_cols),
|
1240
1263
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1241
|
-
statement_params
|
1264
|
+
statement_params,
|
1242
1265
|
)
|
1243
1266
|
|
1244
1267
|
cleanup_temp_files([local_score_file_name])
|
@@ -1256,18 +1279,20 @@ class GaussianMixture(BaseTransformer):
|
|
1256
1279
|
if self._sklearn_object._estimator_type == 'classifier':
|
1257
1280
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1258
1281
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1259
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1282
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1283
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1260
1284
|
# For regressor, the type of predict is float64
|
1261
1285
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1262
1286
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1263
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1264
|
-
|
1287
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1288
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1265
1289
|
for prob_func in PROB_FUNCTIONS:
|
1266
1290
|
if hasattr(self, prob_func):
|
1267
1291
|
output_cols_prefix: str = f"{prob_func}_"
|
1268
1292
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1269
1293
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1270
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1294
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1295
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1271
1296
|
|
1272
1297
|
@property
|
1273
1298
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|