snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -348,7 +350,6 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
348
350
|
sample_weight_col: Optional[str] = None,
|
349
351
|
) -> None:
|
350
352
|
super().__init__()
|
351
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
352
353
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
353
354
|
|
354
355
|
self._deps = list(deps)
|
@@ -385,6 +386,15 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
385
386
|
self.set_drop_input_cols(drop_input_cols)
|
386
387
|
self.set_sample_weight_col(sample_weight_col)
|
387
388
|
|
389
|
+
def _get_rand_id(self) -> str:
|
390
|
+
"""
|
391
|
+
Generate random id to be used in sproc and stage names.
|
392
|
+
|
393
|
+
Returns:
|
394
|
+
Random id string usable in sproc, table, and stage names.
|
395
|
+
"""
|
396
|
+
return str(uuid4()).replace("-", "_").upper()
|
397
|
+
|
388
398
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
389
399
|
"""
|
390
400
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -463,7 +473,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
463
473
|
cp.dump(self._sklearn_object, local_transform_file)
|
464
474
|
|
465
475
|
# Create temp stage to run fit.
|
466
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
476
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
467
477
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
468
478
|
SqlResultValidator(
|
469
479
|
session=session,
|
@@ -476,11 +486,12 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
476
486
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
477
487
|
).validate()
|
478
488
|
|
479
|
-
|
489
|
+
# Use posixpath to construct stage paths
|
490
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
491
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
480
492
|
local_result_file_name = get_temp_file_path()
|
481
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
482
493
|
|
483
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
494
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
484
495
|
statement_params = telemetry.get_function_usage_statement_params(
|
485
496
|
project=_PROJECT,
|
486
497
|
subproject=_SUBPROJECT,
|
@@ -506,6 +517,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
506
517
|
replace=True,
|
507
518
|
session=session,
|
508
519
|
statement_params=statement_params,
|
520
|
+
anonymous=True
|
509
521
|
)
|
510
522
|
def fit_wrapper_sproc(
|
511
523
|
session: Session,
|
@@ -514,7 +526,8 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
514
526
|
stage_result_file_name: str,
|
515
527
|
input_cols: List[str],
|
516
528
|
label_cols: List[str],
|
517
|
-
sample_weight_col: Optional[str]
|
529
|
+
sample_weight_col: Optional[str],
|
530
|
+
statement_params: Dict[str, str]
|
518
531
|
) -> str:
|
519
532
|
import cloudpickle as cp
|
520
533
|
import numpy as np
|
@@ -581,15 +594,15 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
581
594
|
api_calls=[Session.call],
|
582
595
|
custom_tags=dict([("autogen", True)]),
|
583
596
|
)
|
584
|
-
sproc_export_file_name =
|
585
|
-
|
597
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
598
|
+
session,
|
586
599
|
query,
|
587
600
|
stage_transform_file_name,
|
588
601
|
stage_result_file_name,
|
589
602
|
identifier.get_unescaped_names(self.input_cols),
|
590
603
|
identifier.get_unescaped_names(self.label_cols),
|
591
604
|
identifier.get_unescaped_names(self.sample_weight_col),
|
592
|
-
statement_params
|
605
|
+
statement_params,
|
593
606
|
)
|
594
607
|
|
595
608
|
if "|" in sproc_export_file_name:
|
@@ -599,7 +612,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
599
612
|
print("\n".join(fields[1:]))
|
600
613
|
|
601
614
|
session.file.get(
|
602
|
-
|
615
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
603
616
|
local_result_file_name,
|
604
617
|
statement_params=statement_params
|
605
618
|
)
|
@@ -645,7 +658,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
645
658
|
|
646
659
|
# Register vectorized UDF for batch inference
|
647
660
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
648
|
-
safe_id=self.
|
661
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
649
662
|
|
650
663
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
651
664
|
# will try to pickle all of self which fails.
|
@@ -737,7 +750,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
737
750
|
return transformed_pandas_df.to_dict("records")
|
738
751
|
|
739
752
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
740
|
-
safe_id=self.
|
753
|
+
safe_id=self._get_rand_id()
|
741
754
|
)
|
742
755
|
|
743
756
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -904,11 +917,18 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
904
917
|
Transformed dataset.
|
905
918
|
"""
|
906
919
|
if isinstance(dataset, DataFrame):
|
920
|
+
expected_type_inferred = ""
|
921
|
+
# when it is classifier, infer the datatype from label columns
|
922
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
923
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
924
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
925
|
+
)
|
926
|
+
|
907
927
|
output_df = self._batch_inference(
|
908
928
|
dataset=dataset,
|
909
929
|
inference_method="predict",
|
910
930
|
expected_output_cols_list=self.output_cols,
|
911
|
-
expected_output_cols_type=
|
931
|
+
expected_output_cols_type=expected_type_inferred,
|
912
932
|
)
|
913
933
|
elif isinstance(dataset, pd.DataFrame):
|
914
934
|
output_df = self._sklearn_inference(
|
@@ -979,10 +999,10 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
979
999
|
|
980
1000
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
981
1001
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
982
|
-
Returns
|
1002
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
983
1003
|
"""
|
984
1004
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
985
|
-
return []
|
1005
|
+
return [output_cols_prefix]
|
986
1006
|
|
987
1007
|
classes = self._sklearn_object.classes_
|
988
1008
|
if isinstance(classes, numpy.ndarray):
|
@@ -1211,7 +1231,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
1211
1231
|
cp.dump(self._sklearn_object, local_score_file)
|
1212
1232
|
|
1213
1233
|
# Create temp stage to run score.
|
1214
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1234
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1215
1235
|
session = dataset._session
|
1216
1236
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1217
1237
|
SqlResultValidator(
|
@@ -1225,8 +1245,9 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
1225
1245
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1226
1246
|
).validate()
|
1227
1247
|
|
1228
|
-
|
1229
|
-
|
1248
|
+
# Use posixpath to construct stage paths
|
1249
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1250
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1230
1251
|
statement_params = telemetry.get_function_usage_statement_params(
|
1231
1252
|
project=_PROJECT,
|
1232
1253
|
subproject=_SUBPROJECT,
|
@@ -1252,6 +1273,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
1252
1273
|
replace=True,
|
1253
1274
|
session=session,
|
1254
1275
|
statement_params=statement_params,
|
1276
|
+
anonymous=True
|
1255
1277
|
)
|
1256
1278
|
def score_wrapper_sproc(
|
1257
1279
|
session: Session,
|
@@ -1259,7 +1281,8 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
1259
1281
|
stage_score_file_name: str,
|
1260
1282
|
input_cols: List[str],
|
1261
1283
|
label_cols: List[str],
|
1262
|
-
sample_weight_col: Optional[str]
|
1284
|
+
sample_weight_col: Optional[str],
|
1285
|
+
statement_params: Dict[str, str]
|
1263
1286
|
) -> float:
|
1264
1287
|
import cloudpickle as cp
|
1265
1288
|
import numpy as np
|
@@ -1309,14 +1332,14 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
1309
1332
|
api_calls=[Session.call],
|
1310
1333
|
custom_tags=dict([("autogen", True)]),
|
1311
1334
|
)
|
1312
|
-
score =
|
1313
|
-
|
1335
|
+
score = score_wrapper_sproc(
|
1336
|
+
session,
|
1314
1337
|
query,
|
1315
1338
|
stage_score_file_name,
|
1316
1339
|
identifier.get_unescaped_names(self.input_cols),
|
1317
1340
|
identifier.get_unescaped_names(self.label_cols),
|
1318
1341
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1319
|
-
statement_params
|
1342
|
+
statement_params,
|
1320
1343
|
)
|
1321
1344
|
|
1322
1345
|
cleanup_temp_files([local_score_file_name])
|
@@ -1334,18 +1357,20 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
1334
1357
|
if self._sklearn_object._estimator_type == 'classifier':
|
1335
1358
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1336
1359
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1337
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1360
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1361
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1338
1362
|
# For regressor, the type of predict is float64
|
1339
1363
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1340
1364
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1341
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1342
|
-
|
1365
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1366
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1343
1367
|
for prob_func in PROB_FUNCTIONS:
|
1344
1368
|
if hasattr(self, prob_func):
|
1345
1369
|
output_cols_prefix: str = f"{prob_func}_"
|
1346
1370
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1347
1371
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1348
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1372
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1373
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1349
1374
|
|
1350
1375
|
@property
|
1351
1376
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -328,7 +330,6 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
328
330
|
sample_weight_col: Optional[str] = None,
|
329
331
|
) -> None:
|
330
332
|
super().__init__()
|
331
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
332
333
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
333
334
|
|
334
335
|
self._deps = list(deps)
|
@@ -364,6 +365,15 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
364
365
|
self.set_drop_input_cols(drop_input_cols)
|
365
366
|
self.set_sample_weight_col(sample_weight_col)
|
366
367
|
|
368
|
+
def _get_rand_id(self) -> str:
|
369
|
+
"""
|
370
|
+
Generate random id to be used in sproc and stage names.
|
371
|
+
|
372
|
+
Returns:
|
373
|
+
Random id string usable in sproc, table, and stage names.
|
374
|
+
"""
|
375
|
+
return str(uuid4()).replace("-", "_").upper()
|
376
|
+
|
367
377
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
368
378
|
"""
|
369
379
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -442,7 +452,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
442
452
|
cp.dump(self._sklearn_object, local_transform_file)
|
443
453
|
|
444
454
|
# Create temp stage to run fit.
|
445
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
455
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
446
456
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
447
457
|
SqlResultValidator(
|
448
458
|
session=session,
|
@@ -455,11 +465,12 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
455
465
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
456
466
|
).validate()
|
457
467
|
|
458
|
-
|
468
|
+
# Use posixpath to construct stage paths
|
469
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
470
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
459
471
|
local_result_file_name = get_temp_file_path()
|
460
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
461
472
|
|
462
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
473
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
463
474
|
statement_params = telemetry.get_function_usage_statement_params(
|
464
475
|
project=_PROJECT,
|
465
476
|
subproject=_SUBPROJECT,
|
@@ -485,6 +496,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
485
496
|
replace=True,
|
486
497
|
session=session,
|
487
498
|
statement_params=statement_params,
|
499
|
+
anonymous=True
|
488
500
|
)
|
489
501
|
def fit_wrapper_sproc(
|
490
502
|
session: Session,
|
@@ -493,7 +505,8 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
493
505
|
stage_result_file_name: str,
|
494
506
|
input_cols: List[str],
|
495
507
|
label_cols: List[str],
|
496
|
-
sample_weight_col: Optional[str]
|
508
|
+
sample_weight_col: Optional[str],
|
509
|
+
statement_params: Dict[str, str]
|
497
510
|
) -> str:
|
498
511
|
import cloudpickle as cp
|
499
512
|
import numpy as np
|
@@ -560,15 +573,15 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
560
573
|
api_calls=[Session.call],
|
561
574
|
custom_tags=dict([("autogen", True)]),
|
562
575
|
)
|
563
|
-
sproc_export_file_name =
|
564
|
-
|
576
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
577
|
+
session,
|
565
578
|
query,
|
566
579
|
stage_transform_file_name,
|
567
580
|
stage_result_file_name,
|
568
581
|
identifier.get_unescaped_names(self.input_cols),
|
569
582
|
identifier.get_unescaped_names(self.label_cols),
|
570
583
|
identifier.get_unescaped_names(self.sample_weight_col),
|
571
|
-
statement_params
|
584
|
+
statement_params,
|
572
585
|
)
|
573
586
|
|
574
587
|
if "|" in sproc_export_file_name:
|
@@ -578,7 +591,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
578
591
|
print("\n".join(fields[1:]))
|
579
592
|
|
580
593
|
session.file.get(
|
581
|
-
|
594
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
582
595
|
local_result_file_name,
|
583
596
|
statement_params=statement_params
|
584
597
|
)
|
@@ -624,7 +637,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
624
637
|
|
625
638
|
# Register vectorized UDF for batch inference
|
626
639
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
627
|
-
safe_id=self.
|
640
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
628
641
|
|
629
642
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
630
643
|
# will try to pickle all of self which fails.
|
@@ -716,7 +729,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
716
729
|
return transformed_pandas_df.to_dict("records")
|
717
730
|
|
718
731
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
719
|
-
safe_id=self.
|
732
|
+
safe_id=self._get_rand_id()
|
720
733
|
)
|
721
734
|
|
722
735
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -883,11 +896,18 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
883
896
|
Transformed dataset.
|
884
897
|
"""
|
885
898
|
if isinstance(dataset, DataFrame):
|
899
|
+
expected_type_inferred = "float"
|
900
|
+
# when it is classifier, infer the datatype from label columns
|
901
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
902
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
903
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
904
|
+
)
|
905
|
+
|
886
906
|
output_df = self._batch_inference(
|
887
907
|
dataset=dataset,
|
888
908
|
inference_method="predict",
|
889
909
|
expected_output_cols_list=self.output_cols,
|
890
|
-
expected_output_cols_type=
|
910
|
+
expected_output_cols_type=expected_type_inferred,
|
891
911
|
)
|
892
912
|
elif isinstance(dataset, pd.DataFrame):
|
893
913
|
output_df = self._sklearn_inference(
|
@@ -958,10 +978,10 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
958
978
|
|
959
979
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
960
980
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
961
|
-
Returns
|
981
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
962
982
|
"""
|
963
983
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
964
|
-
return []
|
984
|
+
return [output_cols_prefix]
|
965
985
|
|
966
986
|
classes = self._sklearn_object.classes_
|
967
987
|
if isinstance(classes, numpy.ndarray):
|
@@ -1186,7 +1206,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
1186
1206
|
cp.dump(self._sklearn_object, local_score_file)
|
1187
1207
|
|
1188
1208
|
# Create temp stage to run score.
|
1189
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1209
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1190
1210
|
session = dataset._session
|
1191
1211
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1192
1212
|
SqlResultValidator(
|
@@ -1200,8 +1220,9 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
1200
1220
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1201
1221
|
).validate()
|
1202
1222
|
|
1203
|
-
|
1204
|
-
|
1223
|
+
# Use posixpath to construct stage paths
|
1224
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1225
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1205
1226
|
statement_params = telemetry.get_function_usage_statement_params(
|
1206
1227
|
project=_PROJECT,
|
1207
1228
|
subproject=_SUBPROJECT,
|
@@ -1227,6 +1248,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
1227
1248
|
replace=True,
|
1228
1249
|
session=session,
|
1229
1250
|
statement_params=statement_params,
|
1251
|
+
anonymous=True
|
1230
1252
|
)
|
1231
1253
|
def score_wrapper_sproc(
|
1232
1254
|
session: Session,
|
@@ -1234,7 +1256,8 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
1234
1256
|
stage_score_file_name: str,
|
1235
1257
|
input_cols: List[str],
|
1236
1258
|
label_cols: List[str],
|
1237
|
-
sample_weight_col: Optional[str]
|
1259
|
+
sample_weight_col: Optional[str],
|
1260
|
+
statement_params: Dict[str, str]
|
1238
1261
|
) -> float:
|
1239
1262
|
import cloudpickle as cp
|
1240
1263
|
import numpy as np
|
@@ -1284,14 +1307,14 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
1284
1307
|
api_calls=[Session.call],
|
1285
1308
|
custom_tags=dict([("autogen", True)]),
|
1286
1309
|
)
|
1287
|
-
score =
|
1288
|
-
|
1310
|
+
score = score_wrapper_sproc(
|
1311
|
+
session,
|
1289
1312
|
query,
|
1290
1313
|
stage_score_file_name,
|
1291
1314
|
identifier.get_unescaped_names(self.input_cols),
|
1292
1315
|
identifier.get_unescaped_names(self.label_cols),
|
1293
1316
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1294
|
-
statement_params
|
1317
|
+
statement_params,
|
1295
1318
|
)
|
1296
1319
|
|
1297
1320
|
cleanup_temp_files([local_score_file_name])
|
@@ -1309,18 +1332,20 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
1309
1332
|
if self._sklearn_object._estimator_type == 'classifier':
|
1310
1333
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1311
1334
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1312
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1335
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1336
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1313
1337
|
# For regressor, the type of predict is float64
|
1314
1338
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1315
1339
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1316
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1317
|
-
|
1340
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1341
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1318
1342
|
for prob_func in PROB_FUNCTIONS:
|
1319
1343
|
if hasattr(self, prob_func):
|
1320
1344
|
output_cols_prefix: str = f"{prob_func}_"
|
1321
1345
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1322
1346
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1323
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1347
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1348
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1324
1349
|
|
1325
1350
|
@property
|
1326
1351
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|