snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -290,7 +292,6 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
290
292
|
sample_weight_col: Optional[str] = None,
|
291
293
|
) -> None:
|
292
294
|
super().__init__()
|
293
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
294
295
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
295
296
|
|
296
297
|
self._deps = list(deps)
|
@@ -320,6 +321,15 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
320
321
|
self.set_drop_input_cols(drop_input_cols)
|
321
322
|
self.set_sample_weight_col(sample_weight_col)
|
322
323
|
|
324
|
+
def _get_rand_id(self) -> str:
|
325
|
+
"""
|
326
|
+
Generate random id to be used in sproc and stage names.
|
327
|
+
|
328
|
+
Returns:
|
329
|
+
Random id string usable in sproc, table, and stage names.
|
330
|
+
"""
|
331
|
+
return str(uuid4()).replace("-", "_").upper()
|
332
|
+
|
323
333
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
324
334
|
"""
|
325
335
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -398,7 +408,7 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
398
408
|
cp.dump(self._sklearn_object, local_transform_file)
|
399
409
|
|
400
410
|
# Create temp stage to run fit.
|
401
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
411
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
402
412
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
403
413
|
SqlResultValidator(
|
404
414
|
session=session,
|
@@ -411,11 +421,12 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
411
421
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
412
422
|
).validate()
|
413
423
|
|
414
|
-
|
424
|
+
# Use posixpath to construct stage paths
|
425
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
426
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
415
427
|
local_result_file_name = get_temp_file_path()
|
416
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
417
428
|
|
418
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
429
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
419
430
|
statement_params = telemetry.get_function_usage_statement_params(
|
420
431
|
project=_PROJECT,
|
421
432
|
subproject=_SUBPROJECT,
|
@@ -441,6 +452,7 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
441
452
|
replace=True,
|
442
453
|
session=session,
|
443
454
|
statement_params=statement_params,
|
455
|
+
anonymous=True
|
444
456
|
)
|
445
457
|
def fit_wrapper_sproc(
|
446
458
|
session: Session,
|
@@ -449,7 +461,8 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
449
461
|
stage_result_file_name: str,
|
450
462
|
input_cols: List[str],
|
451
463
|
label_cols: List[str],
|
452
|
-
sample_weight_col: Optional[str]
|
464
|
+
sample_weight_col: Optional[str],
|
465
|
+
statement_params: Dict[str, str]
|
453
466
|
) -> str:
|
454
467
|
import cloudpickle as cp
|
455
468
|
import numpy as np
|
@@ -516,15 +529,15 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
516
529
|
api_calls=[Session.call],
|
517
530
|
custom_tags=dict([("autogen", True)]),
|
518
531
|
)
|
519
|
-
sproc_export_file_name =
|
520
|
-
|
532
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
533
|
+
session,
|
521
534
|
query,
|
522
535
|
stage_transform_file_name,
|
523
536
|
stage_result_file_name,
|
524
537
|
identifier.get_unescaped_names(self.input_cols),
|
525
538
|
identifier.get_unescaped_names(self.label_cols),
|
526
539
|
identifier.get_unescaped_names(self.sample_weight_col),
|
527
|
-
statement_params
|
540
|
+
statement_params,
|
528
541
|
)
|
529
542
|
|
530
543
|
if "|" in sproc_export_file_name:
|
@@ -534,7 +547,7 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
534
547
|
print("\n".join(fields[1:]))
|
535
548
|
|
536
549
|
session.file.get(
|
537
|
-
|
550
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
538
551
|
local_result_file_name,
|
539
552
|
statement_params=statement_params
|
540
553
|
)
|
@@ -580,7 +593,7 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
580
593
|
|
581
594
|
# Register vectorized UDF for batch inference
|
582
595
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
583
|
-
safe_id=self.
|
596
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
584
597
|
|
585
598
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
586
599
|
# will try to pickle all of self which fails.
|
@@ -672,7 +685,7 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
672
685
|
return transformed_pandas_df.to_dict("records")
|
673
686
|
|
674
687
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
675
|
-
safe_id=self.
|
688
|
+
safe_id=self._get_rand_id()
|
676
689
|
)
|
677
690
|
|
678
691
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -839,11 +852,18 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
839
852
|
Transformed dataset.
|
840
853
|
"""
|
841
854
|
if isinstance(dataset, DataFrame):
|
855
|
+
expected_type_inferred = "float"
|
856
|
+
# when it is classifier, infer the datatype from label columns
|
857
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
858
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
859
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
860
|
+
)
|
861
|
+
|
842
862
|
output_df = self._batch_inference(
|
843
863
|
dataset=dataset,
|
844
864
|
inference_method="predict",
|
845
865
|
expected_output_cols_list=self.output_cols,
|
846
|
-
expected_output_cols_type=
|
866
|
+
expected_output_cols_type=expected_type_inferred,
|
847
867
|
)
|
848
868
|
elif isinstance(dataset, pd.DataFrame):
|
849
869
|
output_df = self._sklearn_inference(
|
@@ -914,10 +934,10 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
914
934
|
|
915
935
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
916
936
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
917
|
-
Returns
|
937
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
918
938
|
"""
|
919
939
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
920
|
-
return []
|
940
|
+
return [output_cols_prefix]
|
921
941
|
|
922
942
|
classes = self._sklearn_object.classes_
|
923
943
|
if isinstance(classes, numpy.ndarray):
|
@@ -1142,7 +1162,7 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
1142
1162
|
cp.dump(self._sklearn_object, local_score_file)
|
1143
1163
|
|
1144
1164
|
# Create temp stage to run score.
|
1145
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1165
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1146
1166
|
session = dataset._session
|
1147
1167
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1148
1168
|
SqlResultValidator(
|
@@ -1156,8 +1176,9 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
1156
1176
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1157
1177
|
).validate()
|
1158
1178
|
|
1159
|
-
|
1160
|
-
|
1179
|
+
# Use posixpath to construct stage paths
|
1180
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1181
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1161
1182
|
statement_params = telemetry.get_function_usage_statement_params(
|
1162
1183
|
project=_PROJECT,
|
1163
1184
|
subproject=_SUBPROJECT,
|
@@ -1183,6 +1204,7 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
1183
1204
|
replace=True,
|
1184
1205
|
session=session,
|
1185
1206
|
statement_params=statement_params,
|
1207
|
+
anonymous=True
|
1186
1208
|
)
|
1187
1209
|
def score_wrapper_sproc(
|
1188
1210
|
session: Session,
|
@@ -1190,7 +1212,8 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
1190
1212
|
stage_score_file_name: str,
|
1191
1213
|
input_cols: List[str],
|
1192
1214
|
label_cols: List[str],
|
1193
|
-
sample_weight_col: Optional[str]
|
1215
|
+
sample_weight_col: Optional[str],
|
1216
|
+
statement_params: Dict[str, str]
|
1194
1217
|
) -> float:
|
1195
1218
|
import cloudpickle as cp
|
1196
1219
|
import numpy as np
|
@@ -1240,14 +1263,14 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
1240
1263
|
api_calls=[Session.call],
|
1241
1264
|
custom_tags=dict([("autogen", True)]),
|
1242
1265
|
)
|
1243
|
-
score =
|
1244
|
-
|
1266
|
+
score = score_wrapper_sproc(
|
1267
|
+
session,
|
1245
1268
|
query,
|
1246
1269
|
stage_score_file_name,
|
1247
1270
|
identifier.get_unescaped_names(self.input_cols),
|
1248
1271
|
identifier.get_unescaped_names(self.label_cols),
|
1249
1272
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1250
|
-
statement_params
|
1273
|
+
statement_params,
|
1251
1274
|
)
|
1252
1275
|
|
1253
1276
|
cleanup_temp_files([local_score_file_name])
|
@@ -1265,18 +1288,20 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
1265
1288
|
if self._sklearn_object._estimator_type == 'classifier':
|
1266
1289
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1267
1290
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1268
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1291
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1292
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1269
1293
|
# For regressor, the type of predict is float64
|
1270
1294
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1271
1295
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1272
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1273
|
-
|
1296
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1297
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1274
1298
|
for prob_func in PROB_FUNCTIONS:
|
1275
1299
|
if hasattr(self, prob_func):
|
1276
1300
|
output_cols_prefix: str = f"{prob_func}_"
|
1277
1301
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1278
1302
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1279
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1303
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1304
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1280
1305
|
|
1281
1306
|
@property
|
1282
1307
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -299,7 +301,6 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
299
301
|
sample_weight_col: Optional[str] = None,
|
300
302
|
) -> None:
|
301
303
|
super().__init__()
|
302
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
303
304
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
304
305
|
|
305
306
|
self._deps = list(deps)
|
@@ -330,6 +331,15 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
330
331
|
self.set_drop_input_cols(drop_input_cols)
|
331
332
|
self.set_sample_weight_col(sample_weight_col)
|
332
333
|
|
334
|
+
def _get_rand_id(self) -> str:
|
335
|
+
"""
|
336
|
+
Generate random id to be used in sproc and stage names.
|
337
|
+
|
338
|
+
Returns:
|
339
|
+
Random id string usable in sproc, table, and stage names.
|
340
|
+
"""
|
341
|
+
return str(uuid4()).replace("-", "_").upper()
|
342
|
+
|
333
343
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
334
344
|
"""
|
335
345
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -408,7 +418,7 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
408
418
|
cp.dump(self._sklearn_object, local_transform_file)
|
409
419
|
|
410
420
|
# Create temp stage to run fit.
|
411
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
421
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
412
422
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
413
423
|
SqlResultValidator(
|
414
424
|
session=session,
|
@@ -421,11 +431,12 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
421
431
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
422
432
|
).validate()
|
423
433
|
|
424
|
-
|
434
|
+
# Use posixpath to construct stage paths
|
435
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
436
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
425
437
|
local_result_file_name = get_temp_file_path()
|
426
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
427
438
|
|
428
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
439
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
429
440
|
statement_params = telemetry.get_function_usage_statement_params(
|
430
441
|
project=_PROJECT,
|
431
442
|
subproject=_SUBPROJECT,
|
@@ -451,6 +462,7 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
451
462
|
replace=True,
|
452
463
|
session=session,
|
453
464
|
statement_params=statement_params,
|
465
|
+
anonymous=True
|
454
466
|
)
|
455
467
|
def fit_wrapper_sproc(
|
456
468
|
session: Session,
|
@@ -459,7 +471,8 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
459
471
|
stage_result_file_name: str,
|
460
472
|
input_cols: List[str],
|
461
473
|
label_cols: List[str],
|
462
|
-
sample_weight_col: Optional[str]
|
474
|
+
sample_weight_col: Optional[str],
|
475
|
+
statement_params: Dict[str, str]
|
463
476
|
) -> str:
|
464
477
|
import cloudpickle as cp
|
465
478
|
import numpy as np
|
@@ -526,15 +539,15 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
526
539
|
api_calls=[Session.call],
|
527
540
|
custom_tags=dict([("autogen", True)]),
|
528
541
|
)
|
529
|
-
sproc_export_file_name =
|
530
|
-
|
542
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
543
|
+
session,
|
531
544
|
query,
|
532
545
|
stage_transform_file_name,
|
533
546
|
stage_result_file_name,
|
534
547
|
identifier.get_unescaped_names(self.input_cols),
|
535
548
|
identifier.get_unescaped_names(self.label_cols),
|
536
549
|
identifier.get_unescaped_names(self.sample_weight_col),
|
537
|
-
statement_params
|
550
|
+
statement_params,
|
538
551
|
)
|
539
552
|
|
540
553
|
if "|" in sproc_export_file_name:
|
@@ -544,7 +557,7 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
544
557
|
print("\n".join(fields[1:]))
|
545
558
|
|
546
559
|
session.file.get(
|
547
|
-
|
560
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
548
561
|
local_result_file_name,
|
549
562
|
statement_params=statement_params
|
550
563
|
)
|
@@ -590,7 +603,7 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
590
603
|
|
591
604
|
# Register vectorized UDF for batch inference
|
592
605
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
593
|
-
safe_id=self.
|
606
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
594
607
|
|
595
608
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
596
609
|
# will try to pickle all of self which fails.
|
@@ -682,7 +695,7 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
682
695
|
return transformed_pandas_df.to_dict("records")
|
683
696
|
|
684
697
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
685
|
-
safe_id=self.
|
698
|
+
safe_id=self._get_rand_id()
|
686
699
|
)
|
687
700
|
|
688
701
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -849,11 +862,18 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
849
862
|
Transformed dataset.
|
850
863
|
"""
|
851
864
|
if isinstance(dataset, DataFrame):
|
865
|
+
expected_type_inferred = ""
|
866
|
+
# when it is classifier, infer the datatype from label columns
|
867
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
868
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
869
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
870
|
+
)
|
871
|
+
|
852
872
|
output_df = self._batch_inference(
|
853
873
|
dataset=dataset,
|
854
874
|
inference_method="predict",
|
855
875
|
expected_output_cols_list=self.output_cols,
|
856
|
-
expected_output_cols_type=
|
876
|
+
expected_output_cols_type=expected_type_inferred,
|
857
877
|
)
|
858
878
|
elif isinstance(dataset, pd.DataFrame):
|
859
879
|
output_df = self._sklearn_inference(
|
@@ -924,10 +944,10 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
924
944
|
|
925
945
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
926
946
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
927
|
-
Returns
|
947
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
928
948
|
"""
|
929
949
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
930
|
-
return []
|
950
|
+
return [output_cols_prefix]
|
931
951
|
|
932
952
|
classes = self._sklearn_object.classes_
|
933
953
|
if isinstance(classes, numpy.ndarray):
|
@@ -1156,7 +1176,7 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
1156
1176
|
cp.dump(self._sklearn_object, local_score_file)
|
1157
1177
|
|
1158
1178
|
# Create temp stage to run score.
|
1159
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1179
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1160
1180
|
session = dataset._session
|
1161
1181
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1162
1182
|
SqlResultValidator(
|
@@ -1170,8 +1190,9 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
1170
1190
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1171
1191
|
).validate()
|
1172
1192
|
|
1173
|
-
|
1174
|
-
|
1193
|
+
# Use posixpath to construct stage paths
|
1194
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1195
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1175
1196
|
statement_params = telemetry.get_function_usage_statement_params(
|
1176
1197
|
project=_PROJECT,
|
1177
1198
|
subproject=_SUBPROJECT,
|
@@ -1197,6 +1218,7 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
1197
1218
|
replace=True,
|
1198
1219
|
session=session,
|
1199
1220
|
statement_params=statement_params,
|
1221
|
+
anonymous=True
|
1200
1222
|
)
|
1201
1223
|
def score_wrapper_sproc(
|
1202
1224
|
session: Session,
|
@@ -1204,7 +1226,8 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
1204
1226
|
stage_score_file_name: str,
|
1205
1227
|
input_cols: List[str],
|
1206
1228
|
label_cols: List[str],
|
1207
|
-
sample_weight_col: Optional[str]
|
1229
|
+
sample_weight_col: Optional[str],
|
1230
|
+
statement_params: Dict[str, str]
|
1208
1231
|
) -> float:
|
1209
1232
|
import cloudpickle as cp
|
1210
1233
|
import numpy as np
|
@@ -1254,14 +1277,14 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
1254
1277
|
api_calls=[Session.call],
|
1255
1278
|
custom_tags=dict([("autogen", True)]),
|
1256
1279
|
)
|
1257
|
-
score =
|
1258
|
-
|
1280
|
+
score = score_wrapper_sproc(
|
1281
|
+
session,
|
1259
1282
|
query,
|
1260
1283
|
stage_score_file_name,
|
1261
1284
|
identifier.get_unescaped_names(self.input_cols),
|
1262
1285
|
identifier.get_unescaped_names(self.label_cols),
|
1263
1286
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1264
|
-
statement_params
|
1287
|
+
statement_params,
|
1265
1288
|
)
|
1266
1289
|
|
1267
1290
|
cleanup_temp_files([local_score_file_name])
|
@@ -1279,18 +1302,20 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
1279
1302
|
if self._sklearn_object._estimator_type == 'classifier':
|
1280
1303
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1281
1304
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1282
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1305
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1306
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1283
1307
|
# For regressor, the type of predict is float64
|
1284
1308
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1285
1309
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1286
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1287
|
-
|
1310
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1311
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1288
1312
|
for prob_func in PROB_FUNCTIONS:
|
1289
1313
|
if hasattr(self, prob_func):
|
1290
1314
|
output_cols_prefix: str = f"{prob_func}_"
|
1291
1315
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1292
1316
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1293
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1317
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1318
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1294
1319
|
|
1295
1320
|
@property
|
1296
1321
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|