snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -219,7 +221,6 @@ class VotingClassifier(BaseTransformer):
|
|
219
221
|
sample_weight_col: Optional[str] = None,
|
220
222
|
) -> None:
|
221
223
|
super().__init__()
|
222
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
223
224
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
224
225
|
deps = deps | _gather_dependencies(estimators)
|
225
226
|
self._deps = list(deps)
|
@@ -244,6 +245,15 @@ class VotingClassifier(BaseTransformer):
|
|
244
245
|
self.set_drop_input_cols(drop_input_cols)
|
245
246
|
self.set_sample_weight_col(sample_weight_col)
|
246
247
|
|
248
|
+
def _get_rand_id(self) -> str:
|
249
|
+
"""
|
250
|
+
Generate random id to be used in sproc and stage names.
|
251
|
+
|
252
|
+
Returns:
|
253
|
+
Random id string usable in sproc, table, and stage names.
|
254
|
+
"""
|
255
|
+
return str(uuid4()).replace("-", "_").upper()
|
256
|
+
|
247
257
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
248
258
|
"""
|
249
259
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -322,7 +332,7 @@ class VotingClassifier(BaseTransformer):
|
|
322
332
|
cp.dump(self._sklearn_object, local_transform_file)
|
323
333
|
|
324
334
|
# Create temp stage to run fit.
|
325
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
335
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
326
336
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
327
337
|
SqlResultValidator(
|
328
338
|
session=session,
|
@@ -335,11 +345,12 @@ class VotingClassifier(BaseTransformer):
|
|
335
345
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
336
346
|
).validate()
|
337
347
|
|
338
|
-
|
348
|
+
# Use posixpath to construct stage paths
|
349
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
350
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
339
351
|
local_result_file_name = get_temp_file_path()
|
340
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
341
352
|
|
342
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
353
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
343
354
|
statement_params = telemetry.get_function_usage_statement_params(
|
344
355
|
project=_PROJECT,
|
345
356
|
subproject=_SUBPROJECT,
|
@@ -365,6 +376,7 @@ class VotingClassifier(BaseTransformer):
|
|
365
376
|
replace=True,
|
366
377
|
session=session,
|
367
378
|
statement_params=statement_params,
|
379
|
+
anonymous=True
|
368
380
|
)
|
369
381
|
def fit_wrapper_sproc(
|
370
382
|
session: Session,
|
@@ -373,7 +385,8 @@ class VotingClassifier(BaseTransformer):
|
|
373
385
|
stage_result_file_name: str,
|
374
386
|
input_cols: List[str],
|
375
387
|
label_cols: List[str],
|
376
|
-
sample_weight_col: Optional[str]
|
388
|
+
sample_weight_col: Optional[str],
|
389
|
+
statement_params: Dict[str, str]
|
377
390
|
) -> str:
|
378
391
|
import cloudpickle as cp
|
379
392
|
import numpy as np
|
@@ -440,15 +453,15 @@ class VotingClassifier(BaseTransformer):
|
|
440
453
|
api_calls=[Session.call],
|
441
454
|
custom_tags=dict([("autogen", True)]),
|
442
455
|
)
|
443
|
-
sproc_export_file_name =
|
444
|
-
|
456
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
457
|
+
session,
|
445
458
|
query,
|
446
459
|
stage_transform_file_name,
|
447
460
|
stage_result_file_name,
|
448
461
|
identifier.get_unescaped_names(self.input_cols),
|
449
462
|
identifier.get_unescaped_names(self.label_cols),
|
450
463
|
identifier.get_unescaped_names(self.sample_weight_col),
|
451
|
-
statement_params
|
464
|
+
statement_params,
|
452
465
|
)
|
453
466
|
|
454
467
|
if "|" in sproc_export_file_name:
|
@@ -458,7 +471,7 @@ class VotingClassifier(BaseTransformer):
|
|
458
471
|
print("\n".join(fields[1:]))
|
459
472
|
|
460
473
|
session.file.get(
|
461
|
-
|
474
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
462
475
|
local_result_file_name,
|
463
476
|
statement_params=statement_params
|
464
477
|
)
|
@@ -504,7 +517,7 @@ class VotingClassifier(BaseTransformer):
|
|
504
517
|
|
505
518
|
# Register vectorized UDF for batch inference
|
506
519
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
507
|
-
safe_id=self.
|
520
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
508
521
|
|
509
522
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
510
523
|
# will try to pickle all of self which fails.
|
@@ -596,7 +609,7 @@ class VotingClassifier(BaseTransformer):
|
|
596
609
|
return transformed_pandas_df.to_dict("records")
|
597
610
|
|
598
611
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
599
|
-
safe_id=self.
|
612
|
+
safe_id=self._get_rand_id()
|
600
613
|
)
|
601
614
|
|
602
615
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -763,11 +776,18 @@ class VotingClassifier(BaseTransformer):
|
|
763
776
|
Transformed dataset.
|
764
777
|
"""
|
765
778
|
if isinstance(dataset, DataFrame):
|
779
|
+
expected_type_inferred = ""
|
780
|
+
# when it is classifier, infer the datatype from label columns
|
781
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
782
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
783
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
784
|
+
)
|
785
|
+
|
766
786
|
output_df = self._batch_inference(
|
767
787
|
dataset=dataset,
|
768
788
|
inference_method="predict",
|
769
789
|
expected_output_cols_list=self.output_cols,
|
770
|
-
expected_output_cols_type=
|
790
|
+
expected_output_cols_type=expected_type_inferred,
|
771
791
|
)
|
772
792
|
elif isinstance(dataset, pd.DataFrame):
|
773
793
|
output_df = self._sklearn_inference(
|
@@ -840,10 +860,10 @@ class VotingClassifier(BaseTransformer):
|
|
840
860
|
|
841
861
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
842
862
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
843
|
-
Returns
|
863
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
844
864
|
"""
|
845
865
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
846
|
-
return []
|
866
|
+
return [output_cols_prefix]
|
847
867
|
|
848
868
|
classes = self._sklearn_object.classes_
|
849
869
|
if isinstance(classes, numpy.ndarray):
|
@@ -1072,7 +1092,7 @@ class VotingClassifier(BaseTransformer):
|
|
1072
1092
|
cp.dump(self._sklearn_object, local_score_file)
|
1073
1093
|
|
1074
1094
|
# Create temp stage to run score.
|
1075
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1095
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1076
1096
|
session = dataset._session
|
1077
1097
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1078
1098
|
SqlResultValidator(
|
@@ -1086,8 +1106,9 @@ class VotingClassifier(BaseTransformer):
|
|
1086
1106
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1087
1107
|
).validate()
|
1088
1108
|
|
1089
|
-
|
1090
|
-
|
1109
|
+
# Use posixpath to construct stage paths
|
1110
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1111
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1091
1112
|
statement_params = telemetry.get_function_usage_statement_params(
|
1092
1113
|
project=_PROJECT,
|
1093
1114
|
subproject=_SUBPROJECT,
|
@@ -1113,6 +1134,7 @@ class VotingClassifier(BaseTransformer):
|
|
1113
1134
|
replace=True,
|
1114
1135
|
session=session,
|
1115
1136
|
statement_params=statement_params,
|
1137
|
+
anonymous=True
|
1116
1138
|
)
|
1117
1139
|
def score_wrapper_sproc(
|
1118
1140
|
session: Session,
|
@@ -1120,7 +1142,8 @@ class VotingClassifier(BaseTransformer):
|
|
1120
1142
|
stage_score_file_name: str,
|
1121
1143
|
input_cols: List[str],
|
1122
1144
|
label_cols: List[str],
|
1123
|
-
sample_weight_col: Optional[str]
|
1145
|
+
sample_weight_col: Optional[str],
|
1146
|
+
statement_params: Dict[str, str]
|
1124
1147
|
) -> float:
|
1125
1148
|
import cloudpickle as cp
|
1126
1149
|
import numpy as np
|
@@ -1170,14 +1193,14 @@ class VotingClassifier(BaseTransformer):
|
|
1170
1193
|
api_calls=[Session.call],
|
1171
1194
|
custom_tags=dict([("autogen", True)]),
|
1172
1195
|
)
|
1173
|
-
score =
|
1174
|
-
|
1196
|
+
score = score_wrapper_sproc(
|
1197
|
+
session,
|
1175
1198
|
query,
|
1176
1199
|
stage_score_file_name,
|
1177
1200
|
identifier.get_unescaped_names(self.input_cols),
|
1178
1201
|
identifier.get_unescaped_names(self.label_cols),
|
1179
1202
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1180
|
-
statement_params
|
1203
|
+
statement_params,
|
1181
1204
|
)
|
1182
1205
|
|
1183
1206
|
cleanup_temp_files([local_score_file_name])
|
@@ -1195,18 +1218,20 @@ class VotingClassifier(BaseTransformer):
|
|
1195
1218
|
if self._sklearn_object._estimator_type == 'classifier':
|
1196
1219
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1197
1220
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1198
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1222
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1199
1223
|
# For regressor, the type of predict is float64
|
1200
1224
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1201
1225
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1202
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1203
|
-
|
1226
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1227
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1204
1228
|
for prob_func in PROB_FUNCTIONS:
|
1205
1229
|
if hasattr(self, prob_func):
|
1206
1230
|
output_cols_prefix: str = f"{prob_func}_"
|
1207
1231
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1208
1232
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1209
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1233
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1234
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1210
1235
|
|
1211
1236
|
@property
|
1212
1237
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -203,7 +205,6 @@ class VotingRegressor(BaseTransformer):
|
|
203
205
|
sample_weight_col: Optional[str] = None,
|
204
206
|
) -> None:
|
205
207
|
super().__init__()
|
206
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
207
208
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
208
209
|
deps = deps | _gather_dependencies(estimators)
|
209
210
|
self._deps = list(deps)
|
@@ -226,6 +227,15 @@ class VotingRegressor(BaseTransformer):
|
|
226
227
|
self.set_drop_input_cols(drop_input_cols)
|
227
228
|
self.set_sample_weight_col(sample_weight_col)
|
228
229
|
|
230
|
+
def _get_rand_id(self) -> str:
|
231
|
+
"""
|
232
|
+
Generate random id to be used in sproc and stage names.
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
Random id string usable in sproc, table, and stage names.
|
236
|
+
"""
|
237
|
+
return str(uuid4()).replace("-", "_").upper()
|
238
|
+
|
229
239
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
230
240
|
"""
|
231
241
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -304,7 +314,7 @@ class VotingRegressor(BaseTransformer):
|
|
304
314
|
cp.dump(self._sklearn_object, local_transform_file)
|
305
315
|
|
306
316
|
# Create temp stage to run fit.
|
307
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
317
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
308
318
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
309
319
|
SqlResultValidator(
|
310
320
|
session=session,
|
@@ -317,11 +327,12 @@ class VotingRegressor(BaseTransformer):
|
|
317
327
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
318
328
|
).validate()
|
319
329
|
|
320
|
-
|
330
|
+
# Use posixpath to construct stage paths
|
331
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
332
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
321
333
|
local_result_file_name = get_temp_file_path()
|
322
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
323
334
|
|
324
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
335
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
325
336
|
statement_params = telemetry.get_function_usage_statement_params(
|
326
337
|
project=_PROJECT,
|
327
338
|
subproject=_SUBPROJECT,
|
@@ -347,6 +358,7 @@ class VotingRegressor(BaseTransformer):
|
|
347
358
|
replace=True,
|
348
359
|
session=session,
|
349
360
|
statement_params=statement_params,
|
361
|
+
anonymous=True
|
350
362
|
)
|
351
363
|
def fit_wrapper_sproc(
|
352
364
|
session: Session,
|
@@ -355,7 +367,8 @@ class VotingRegressor(BaseTransformer):
|
|
355
367
|
stage_result_file_name: str,
|
356
368
|
input_cols: List[str],
|
357
369
|
label_cols: List[str],
|
358
|
-
sample_weight_col: Optional[str]
|
370
|
+
sample_weight_col: Optional[str],
|
371
|
+
statement_params: Dict[str, str]
|
359
372
|
) -> str:
|
360
373
|
import cloudpickle as cp
|
361
374
|
import numpy as np
|
@@ -422,15 +435,15 @@ class VotingRegressor(BaseTransformer):
|
|
422
435
|
api_calls=[Session.call],
|
423
436
|
custom_tags=dict([("autogen", True)]),
|
424
437
|
)
|
425
|
-
sproc_export_file_name =
|
426
|
-
|
438
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
439
|
+
session,
|
427
440
|
query,
|
428
441
|
stage_transform_file_name,
|
429
442
|
stage_result_file_name,
|
430
443
|
identifier.get_unescaped_names(self.input_cols),
|
431
444
|
identifier.get_unescaped_names(self.label_cols),
|
432
445
|
identifier.get_unescaped_names(self.sample_weight_col),
|
433
|
-
statement_params
|
446
|
+
statement_params,
|
434
447
|
)
|
435
448
|
|
436
449
|
if "|" in sproc_export_file_name:
|
@@ -440,7 +453,7 @@ class VotingRegressor(BaseTransformer):
|
|
440
453
|
print("\n".join(fields[1:]))
|
441
454
|
|
442
455
|
session.file.get(
|
443
|
-
|
456
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
444
457
|
local_result_file_name,
|
445
458
|
statement_params=statement_params
|
446
459
|
)
|
@@ -486,7 +499,7 @@ class VotingRegressor(BaseTransformer):
|
|
486
499
|
|
487
500
|
# Register vectorized UDF for batch inference
|
488
501
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
489
|
-
safe_id=self.
|
502
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
490
503
|
|
491
504
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
492
505
|
# will try to pickle all of self which fails.
|
@@ -578,7 +591,7 @@ class VotingRegressor(BaseTransformer):
|
|
578
591
|
return transformed_pandas_df.to_dict("records")
|
579
592
|
|
580
593
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
581
|
-
safe_id=self.
|
594
|
+
safe_id=self._get_rand_id()
|
582
595
|
)
|
583
596
|
|
584
597
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -745,11 +758,18 @@ class VotingRegressor(BaseTransformer):
|
|
745
758
|
Transformed dataset.
|
746
759
|
"""
|
747
760
|
if isinstance(dataset, DataFrame):
|
761
|
+
expected_type_inferred = "float"
|
762
|
+
# when it is classifier, infer the datatype from label columns
|
763
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
764
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
765
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
766
|
+
)
|
767
|
+
|
748
768
|
output_df = self._batch_inference(
|
749
769
|
dataset=dataset,
|
750
770
|
inference_method="predict",
|
751
771
|
expected_output_cols_list=self.output_cols,
|
752
|
-
expected_output_cols_type=
|
772
|
+
expected_output_cols_type=expected_type_inferred,
|
753
773
|
)
|
754
774
|
elif isinstance(dataset, pd.DataFrame):
|
755
775
|
output_df = self._sklearn_inference(
|
@@ -822,10 +842,10 @@ class VotingRegressor(BaseTransformer):
|
|
822
842
|
|
823
843
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
824
844
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
825
|
-
Returns
|
845
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
826
846
|
"""
|
827
847
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
828
|
-
return []
|
848
|
+
return [output_cols_prefix]
|
829
849
|
|
830
850
|
classes = self._sklearn_object.classes_
|
831
851
|
if isinstance(classes, numpy.ndarray):
|
@@ -1050,7 +1070,7 @@ class VotingRegressor(BaseTransformer):
|
|
1050
1070
|
cp.dump(self._sklearn_object, local_score_file)
|
1051
1071
|
|
1052
1072
|
# Create temp stage to run score.
|
1053
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1073
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1054
1074
|
session = dataset._session
|
1055
1075
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1056
1076
|
SqlResultValidator(
|
@@ -1064,8 +1084,9 @@ class VotingRegressor(BaseTransformer):
|
|
1064
1084
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1065
1085
|
).validate()
|
1066
1086
|
|
1067
|
-
|
1068
|
-
|
1087
|
+
# Use posixpath to construct stage paths
|
1088
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1089
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1069
1090
|
statement_params = telemetry.get_function_usage_statement_params(
|
1070
1091
|
project=_PROJECT,
|
1071
1092
|
subproject=_SUBPROJECT,
|
@@ -1091,6 +1112,7 @@ class VotingRegressor(BaseTransformer):
|
|
1091
1112
|
replace=True,
|
1092
1113
|
session=session,
|
1093
1114
|
statement_params=statement_params,
|
1115
|
+
anonymous=True
|
1094
1116
|
)
|
1095
1117
|
def score_wrapper_sproc(
|
1096
1118
|
session: Session,
|
@@ -1098,7 +1120,8 @@ class VotingRegressor(BaseTransformer):
|
|
1098
1120
|
stage_score_file_name: str,
|
1099
1121
|
input_cols: List[str],
|
1100
1122
|
label_cols: List[str],
|
1101
|
-
sample_weight_col: Optional[str]
|
1123
|
+
sample_weight_col: Optional[str],
|
1124
|
+
statement_params: Dict[str, str]
|
1102
1125
|
) -> float:
|
1103
1126
|
import cloudpickle as cp
|
1104
1127
|
import numpy as np
|
@@ -1148,14 +1171,14 @@ class VotingRegressor(BaseTransformer):
|
|
1148
1171
|
api_calls=[Session.call],
|
1149
1172
|
custom_tags=dict([("autogen", True)]),
|
1150
1173
|
)
|
1151
|
-
score =
|
1152
|
-
|
1174
|
+
score = score_wrapper_sproc(
|
1175
|
+
session,
|
1153
1176
|
query,
|
1154
1177
|
stage_score_file_name,
|
1155
1178
|
identifier.get_unescaped_names(self.input_cols),
|
1156
1179
|
identifier.get_unescaped_names(self.label_cols),
|
1157
1180
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1158
|
-
statement_params
|
1181
|
+
statement_params,
|
1159
1182
|
)
|
1160
1183
|
|
1161
1184
|
cleanup_temp_files([local_score_file_name])
|
@@ -1173,18 +1196,20 @@ class VotingRegressor(BaseTransformer):
|
|
1173
1196
|
if self._sklearn_object._estimator_type == 'classifier':
|
1174
1197
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1175
1198
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1176
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1199
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1200
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1177
1201
|
# For regressor, the type of predict is float64
|
1178
1202
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1179
1203
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1180
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1181
|
-
|
1204
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1205
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1182
1206
|
for prob_func in PROB_FUNCTIONS:
|
1183
1207
|
if hasattr(self, prob_func):
|
1184
1208
|
output_cols_prefix: str = f"{prob_func}_"
|
1185
1209
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1186
1210
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1187
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1211
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1212
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1188
1213
|
|
1189
1214
|
@property
|
1190
1215
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|