snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -324,7 +326,6 @@ class RandomForestRegressor(BaseTransformer):
|
|
324
326
|
sample_weight_col: Optional[str] = None,
|
325
327
|
) -> None:
|
326
328
|
super().__init__()
|
327
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
328
329
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
329
330
|
|
330
331
|
self._deps = list(deps)
|
@@ -360,6 +361,15 @@ class RandomForestRegressor(BaseTransformer):
|
|
360
361
|
self.set_drop_input_cols(drop_input_cols)
|
361
362
|
self.set_sample_weight_col(sample_weight_col)
|
362
363
|
|
364
|
+
def _get_rand_id(self) -> str:
|
365
|
+
"""
|
366
|
+
Generate random id to be used in sproc and stage names.
|
367
|
+
|
368
|
+
Returns:
|
369
|
+
Random id string usable in sproc, table, and stage names.
|
370
|
+
"""
|
371
|
+
return str(uuid4()).replace("-", "_").upper()
|
372
|
+
|
363
373
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
364
374
|
"""
|
365
375
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -438,7 +448,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
438
448
|
cp.dump(self._sklearn_object, local_transform_file)
|
439
449
|
|
440
450
|
# Create temp stage to run fit.
|
441
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
451
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
442
452
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
443
453
|
SqlResultValidator(
|
444
454
|
session=session,
|
@@ -451,11 +461,12 @@ class RandomForestRegressor(BaseTransformer):
|
|
451
461
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
452
462
|
).validate()
|
453
463
|
|
454
|
-
|
464
|
+
# Use posixpath to construct stage paths
|
465
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
466
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
455
467
|
local_result_file_name = get_temp_file_path()
|
456
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
457
468
|
|
458
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
469
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
459
470
|
statement_params = telemetry.get_function_usage_statement_params(
|
460
471
|
project=_PROJECT,
|
461
472
|
subproject=_SUBPROJECT,
|
@@ -481,6 +492,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
481
492
|
replace=True,
|
482
493
|
session=session,
|
483
494
|
statement_params=statement_params,
|
495
|
+
anonymous=True
|
484
496
|
)
|
485
497
|
def fit_wrapper_sproc(
|
486
498
|
session: Session,
|
@@ -489,7 +501,8 @@ class RandomForestRegressor(BaseTransformer):
|
|
489
501
|
stage_result_file_name: str,
|
490
502
|
input_cols: List[str],
|
491
503
|
label_cols: List[str],
|
492
|
-
sample_weight_col: Optional[str]
|
504
|
+
sample_weight_col: Optional[str],
|
505
|
+
statement_params: Dict[str, str]
|
493
506
|
) -> str:
|
494
507
|
import cloudpickle as cp
|
495
508
|
import numpy as np
|
@@ -556,15 +569,15 @@ class RandomForestRegressor(BaseTransformer):
|
|
556
569
|
api_calls=[Session.call],
|
557
570
|
custom_tags=dict([("autogen", True)]),
|
558
571
|
)
|
559
|
-
sproc_export_file_name =
|
560
|
-
|
572
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
573
|
+
session,
|
561
574
|
query,
|
562
575
|
stage_transform_file_name,
|
563
576
|
stage_result_file_name,
|
564
577
|
identifier.get_unescaped_names(self.input_cols),
|
565
578
|
identifier.get_unescaped_names(self.label_cols),
|
566
579
|
identifier.get_unescaped_names(self.sample_weight_col),
|
567
|
-
statement_params
|
580
|
+
statement_params,
|
568
581
|
)
|
569
582
|
|
570
583
|
if "|" in sproc_export_file_name:
|
@@ -574,7 +587,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
574
587
|
print("\n".join(fields[1:]))
|
575
588
|
|
576
589
|
session.file.get(
|
577
|
-
|
590
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
578
591
|
local_result_file_name,
|
579
592
|
statement_params=statement_params
|
580
593
|
)
|
@@ -620,7 +633,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
620
633
|
|
621
634
|
# Register vectorized UDF for batch inference
|
622
635
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
623
|
-
safe_id=self.
|
636
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
624
637
|
|
625
638
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
626
639
|
# will try to pickle all of self which fails.
|
@@ -712,7 +725,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
712
725
|
return transformed_pandas_df.to_dict("records")
|
713
726
|
|
714
727
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
715
|
-
safe_id=self.
|
728
|
+
safe_id=self._get_rand_id()
|
716
729
|
)
|
717
730
|
|
718
731
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -879,11 +892,18 @@ class RandomForestRegressor(BaseTransformer):
|
|
879
892
|
Transformed dataset.
|
880
893
|
"""
|
881
894
|
if isinstance(dataset, DataFrame):
|
895
|
+
expected_type_inferred = "float"
|
896
|
+
# when it is classifier, infer the datatype from label columns
|
897
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
898
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
899
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
900
|
+
)
|
901
|
+
|
882
902
|
output_df = self._batch_inference(
|
883
903
|
dataset=dataset,
|
884
904
|
inference_method="predict",
|
885
905
|
expected_output_cols_list=self.output_cols,
|
886
|
-
expected_output_cols_type=
|
906
|
+
expected_output_cols_type=expected_type_inferred,
|
887
907
|
)
|
888
908
|
elif isinstance(dataset, pd.DataFrame):
|
889
909
|
output_df = self._sklearn_inference(
|
@@ -954,10 +974,10 @@ class RandomForestRegressor(BaseTransformer):
|
|
954
974
|
|
955
975
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
956
976
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
957
|
-
Returns
|
977
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
958
978
|
"""
|
959
979
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
960
|
-
return []
|
980
|
+
return [output_cols_prefix]
|
961
981
|
|
962
982
|
classes = self._sklearn_object.classes_
|
963
983
|
if isinstance(classes, numpy.ndarray):
|
@@ -1182,7 +1202,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
1182
1202
|
cp.dump(self._sklearn_object, local_score_file)
|
1183
1203
|
|
1184
1204
|
# Create temp stage to run score.
|
1185
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1205
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1186
1206
|
session = dataset._session
|
1187
1207
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1188
1208
|
SqlResultValidator(
|
@@ -1196,8 +1216,9 @@ class RandomForestRegressor(BaseTransformer):
|
|
1196
1216
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1197
1217
|
).validate()
|
1198
1218
|
|
1199
|
-
|
1200
|
-
|
1219
|
+
# Use posixpath to construct stage paths
|
1220
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1221
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1201
1222
|
statement_params = telemetry.get_function_usage_statement_params(
|
1202
1223
|
project=_PROJECT,
|
1203
1224
|
subproject=_SUBPROJECT,
|
@@ -1223,6 +1244,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
1223
1244
|
replace=True,
|
1224
1245
|
session=session,
|
1225
1246
|
statement_params=statement_params,
|
1247
|
+
anonymous=True
|
1226
1248
|
)
|
1227
1249
|
def score_wrapper_sproc(
|
1228
1250
|
session: Session,
|
@@ -1230,7 +1252,8 @@ class RandomForestRegressor(BaseTransformer):
|
|
1230
1252
|
stage_score_file_name: str,
|
1231
1253
|
input_cols: List[str],
|
1232
1254
|
label_cols: List[str],
|
1233
|
-
sample_weight_col: Optional[str]
|
1255
|
+
sample_weight_col: Optional[str],
|
1256
|
+
statement_params: Dict[str, str]
|
1234
1257
|
) -> float:
|
1235
1258
|
import cloudpickle as cp
|
1236
1259
|
import numpy as np
|
@@ -1280,14 +1303,14 @@ class RandomForestRegressor(BaseTransformer):
|
|
1280
1303
|
api_calls=[Session.call],
|
1281
1304
|
custom_tags=dict([("autogen", True)]),
|
1282
1305
|
)
|
1283
|
-
score =
|
1284
|
-
|
1306
|
+
score = score_wrapper_sproc(
|
1307
|
+
session,
|
1285
1308
|
query,
|
1286
1309
|
stage_score_file_name,
|
1287
1310
|
identifier.get_unescaped_names(self.input_cols),
|
1288
1311
|
identifier.get_unescaped_names(self.label_cols),
|
1289
1312
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1290
|
-
statement_params
|
1313
|
+
statement_params,
|
1291
1314
|
)
|
1292
1315
|
|
1293
1316
|
cleanup_temp_files([local_score_file_name])
|
@@ -1305,18 +1328,20 @@ class RandomForestRegressor(BaseTransformer):
|
|
1305
1328
|
if self._sklearn_object._estimator_type == 'classifier':
|
1306
1329
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1307
1330
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1308
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1331
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1332
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1309
1333
|
# For regressor, the type of predict is float64
|
1310
1334
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1311
1335
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1312
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1313
|
-
|
1336
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1337
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1314
1338
|
for prob_func in PROB_FUNCTIONS:
|
1315
1339
|
if hasattr(self, prob_func):
|
1316
1340
|
output_cols_prefix: str = f"{prob_func}_"
|
1317
1341
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1318
1342
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1319
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1343
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1344
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1320
1345
|
|
1321
1346
|
@property
|
1322
1347
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -235,7 +237,6 @@ class StackingRegressor(BaseTransformer):
|
|
235
237
|
sample_weight_col: Optional[str] = None,
|
236
238
|
) -> None:
|
237
239
|
super().__init__()
|
238
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
239
240
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
240
241
|
deps = deps | _gather_dependencies(estimators)
|
241
242
|
deps = deps | _gather_dependencies(final_estimator)
|
@@ -262,6 +263,15 @@ class StackingRegressor(BaseTransformer):
|
|
262
263
|
self.set_drop_input_cols(drop_input_cols)
|
263
264
|
self.set_sample_weight_col(sample_weight_col)
|
264
265
|
|
266
|
+
def _get_rand_id(self) -> str:
|
267
|
+
"""
|
268
|
+
Generate random id to be used in sproc and stage names.
|
269
|
+
|
270
|
+
Returns:
|
271
|
+
Random id string usable in sproc, table, and stage names.
|
272
|
+
"""
|
273
|
+
return str(uuid4()).replace("-", "_").upper()
|
274
|
+
|
265
275
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
266
276
|
"""
|
267
277
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -340,7 +350,7 @@ class StackingRegressor(BaseTransformer):
|
|
340
350
|
cp.dump(self._sklearn_object, local_transform_file)
|
341
351
|
|
342
352
|
# Create temp stage to run fit.
|
343
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
353
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
344
354
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
345
355
|
SqlResultValidator(
|
346
356
|
session=session,
|
@@ -353,11 +363,12 @@ class StackingRegressor(BaseTransformer):
|
|
353
363
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
354
364
|
).validate()
|
355
365
|
|
356
|
-
|
366
|
+
# Use posixpath to construct stage paths
|
367
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
368
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
357
369
|
local_result_file_name = get_temp_file_path()
|
358
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
359
370
|
|
360
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
371
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
361
372
|
statement_params = telemetry.get_function_usage_statement_params(
|
362
373
|
project=_PROJECT,
|
363
374
|
subproject=_SUBPROJECT,
|
@@ -383,6 +394,7 @@ class StackingRegressor(BaseTransformer):
|
|
383
394
|
replace=True,
|
384
395
|
session=session,
|
385
396
|
statement_params=statement_params,
|
397
|
+
anonymous=True
|
386
398
|
)
|
387
399
|
def fit_wrapper_sproc(
|
388
400
|
session: Session,
|
@@ -391,7 +403,8 @@ class StackingRegressor(BaseTransformer):
|
|
391
403
|
stage_result_file_name: str,
|
392
404
|
input_cols: List[str],
|
393
405
|
label_cols: List[str],
|
394
|
-
sample_weight_col: Optional[str]
|
406
|
+
sample_weight_col: Optional[str],
|
407
|
+
statement_params: Dict[str, str]
|
395
408
|
) -> str:
|
396
409
|
import cloudpickle as cp
|
397
410
|
import numpy as np
|
@@ -458,15 +471,15 @@ class StackingRegressor(BaseTransformer):
|
|
458
471
|
api_calls=[Session.call],
|
459
472
|
custom_tags=dict([("autogen", True)]),
|
460
473
|
)
|
461
|
-
sproc_export_file_name =
|
462
|
-
|
474
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
475
|
+
session,
|
463
476
|
query,
|
464
477
|
stage_transform_file_name,
|
465
478
|
stage_result_file_name,
|
466
479
|
identifier.get_unescaped_names(self.input_cols),
|
467
480
|
identifier.get_unescaped_names(self.label_cols),
|
468
481
|
identifier.get_unescaped_names(self.sample_weight_col),
|
469
|
-
statement_params
|
482
|
+
statement_params,
|
470
483
|
)
|
471
484
|
|
472
485
|
if "|" in sproc_export_file_name:
|
@@ -476,7 +489,7 @@ class StackingRegressor(BaseTransformer):
|
|
476
489
|
print("\n".join(fields[1:]))
|
477
490
|
|
478
491
|
session.file.get(
|
479
|
-
|
492
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
480
493
|
local_result_file_name,
|
481
494
|
statement_params=statement_params
|
482
495
|
)
|
@@ -522,7 +535,7 @@ class StackingRegressor(BaseTransformer):
|
|
522
535
|
|
523
536
|
# Register vectorized UDF for batch inference
|
524
537
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
525
|
-
safe_id=self.
|
538
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
526
539
|
|
527
540
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
528
541
|
# will try to pickle all of self which fails.
|
@@ -614,7 +627,7 @@ class StackingRegressor(BaseTransformer):
|
|
614
627
|
return transformed_pandas_df.to_dict("records")
|
615
628
|
|
616
629
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
617
|
-
safe_id=self.
|
630
|
+
safe_id=self._get_rand_id()
|
618
631
|
)
|
619
632
|
|
620
633
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -781,11 +794,18 @@ class StackingRegressor(BaseTransformer):
|
|
781
794
|
Transformed dataset.
|
782
795
|
"""
|
783
796
|
if isinstance(dataset, DataFrame):
|
797
|
+
expected_type_inferred = "float"
|
798
|
+
# when it is classifier, infer the datatype from label columns
|
799
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
800
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
801
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
802
|
+
)
|
803
|
+
|
784
804
|
output_df = self._batch_inference(
|
785
805
|
dataset=dataset,
|
786
806
|
inference_method="predict",
|
787
807
|
expected_output_cols_list=self.output_cols,
|
788
|
-
expected_output_cols_type=
|
808
|
+
expected_output_cols_type=expected_type_inferred,
|
789
809
|
)
|
790
810
|
elif isinstance(dataset, pd.DataFrame):
|
791
811
|
output_df = self._sklearn_inference(
|
@@ -858,10 +878,10 @@ class StackingRegressor(BaseTransformer):
|
|
858
878
|
|
859
879
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
860
880
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
861
|
-
Returns
|
881
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
862
882
|
"""
|
863
883
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
864
|
-
return []
|
884
|
+
return [output_cols_prefix]
|
865
885
|
|
866
886
|
classes = self._sklearn_object.classes_
|
867
887
|
if isinstance(classes, numpy.ndarray):
|
@@ -1086,7 +1106,7 @@ class StackingRegressor(BaseTransformer):
|
|
1086
1106
|
cp.dump(self._sklearn_object, local_score_file)
|
1087
1107
|
|
1088
1108
|
# Create temp stage to run score.
|
1089
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1109
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1090
1110
|
session = dataset._session
|
1091
1111
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1092
1112
|
SqlResultValidator(
|
@@ -1100,8 +1120,9 @@ class StackingRegressor(BaseTransformer):
|
|
1100
1120
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1101
1121
|
).validate()
|
1102
1122
|
|
1103
|
-
|
1104
|
-
|
1123
|
+
# Use posixpath to construct stage paths
|
1124
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1125
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1105
1126
|
statement_params = telemetry.get_function_usage_statement_params(
|
1106
1127
|
project=_PROJECT,
|
1107
1128
|
subproject=_SUBPROJECT,
|
@@ -1127,6 +1148,7 @@ class StackingRegressor(BaseTransformer):
|
|
1127
1148
|
replace=True,
|
1128
1149
|
session=session,
|
1129
1150
|
statement_params=statement_params,
|
1151
|
+
anonymous=True
|
1130
1152
|
)
|
1131
1153
|
def score_wrapper_sproc(
|
1132
1154
|
session: Session,
|
@@ -1134,7 +1156,8 @@ class StackingRegressor(BaseTransformer):
|
|
1134
1156
|
stage_score_file_name: str,
|
1135
1157
|
input_cols: List[str],
|
1136
1158
|
label_cols: List[str],
|
1137
|
-
sample_weight_col: Optional[str]
|
1159
|
+
sample_weight_col: Optional[str],
|
1160
|
+
statement_params: Dict[str, str]
|
1138
1161
|
) -> float:
|
1139
1162
|
import cloudpickle as cp
|
1140
1163
|
import numpy as np
|
@@ -1184,14 +1207,14 @@ class StackingRegressor(BaseTransformer):
|
|
1184
1207
|
api_calls=[Session.call],
|
1185
1208
|
custom_tags=dict([("autogen", True)]),
|
1186
1209
|
)
|
1187
|
-
score =
|
1188
|
-
|
1210
|
+
score = score_wrapper_sproc(
|
1211
|
+
session,
|
1189
1212
|
query,
|
1190
1213
|
stage_score_file_name,
|
1191
1214
|
identifier.get_unescaped_names(self.input_cols),
|
1192
1215
|
identifier.get_unescaped_names(self.label_cols),
|
1193
1216
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1194
|
-
statement_params
|
1217
|
+
statement_params,
|
1195
1218
|
)
|
1196
1219
|
|
1197
1220
|
cleanup_temp_files([local_score_file_name])
|
@@ -1209,18 +1232,20 @@ class StackingRegressor(BaseTransformer):
|
|
1209
1232
|
if self._sklearn_object._estimator_type == 'classifier':
|
1210
1233
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1211
1234
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1212
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1235
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1236
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1213
1237
|
# For regressor, the type of predict is float64
|
1214
1238
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1215
1239
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1216
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1217
|
-
|
1240
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1241
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1218
1242
|
for prob_func in PROB_FUNCTIONS:
|
1219
1243
|
if hasattr(self, prob_func):
|
1220
1244
|
output_cols_prefix: str = f"{prob_func}_"
|
1221
1245
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1222
1246
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1223
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1247
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1248
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1224
1249
|
|
1225
1250
|
@property
|
1226
1251
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|