snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -253,7 +255,6 @@ class ColumnTransformer(BaseTransformer):
|
|
253
255
|
sample_weight_col: Optional[str] = None,
|
254
256
|
) -> None:
|
255
257
|
super().__init__()
|
256
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
257
258
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
258
259
|
deps = deps | _gather_dependencies(transformers)
|
259
260
|
self._deps = list(deps)
|
@@ -279,6 +280,15 @@ class ColumnTransformer(BaseTransformer):
|
|
279
280
|
self.set_drop_input_cols(drop_input_cols)
|
280
281
|
self.set_sample_weight_col(sample_weight_col)
|
281
282
|
|
283
|
+
def _get_rand_id(self) -> str:
|
284
|
+
"""
|
285
|
+
Generate random id to be used in sproc and stage names.
|
286
|
+
|
287
|
+
Returns:
|
288
|
+
Random id string usable in sproc, table, and stage names.
|
289
|
+
"""
|
290
|
+
return str(uuid4()).replace("-", "_").upper()
|
291
|
+
|
282
292
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
283
293
|
"""
|
284
294
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -357,7 +367,7 @@ class ColumnTransformer(BaseTransformer):
|
|
357
367
|
cp.dump(self._sklearn_object, local_transform_file)
|
358
368
|
|
359
369
|
# Create temp stage to run fit.
|
360
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
370
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
361
371
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
362
372
|
SqlResultValidator(
|
363
373
|
session=session,
|
@@ -370,11 +380,12 @@ class ColumnTransformer(BaseTransformer):
|
|
370
380
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
371
381
|
).validate()
|
372
382
|
|
373
|
-
|
383
|
+
# Use posixpath to construct stage paths
|
384
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
385
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
374
386
|
local_result_file_name = get_temp_file_path()
|
375
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
376
387
|
|
377
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
388
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
378
389
|
statement_params = telemetry.get_function_usage_statement_params(
|
379
390
|
project=_PROJECT,
|
380
391
|
subproject=_SUBPROJECT,
|
@@ -400,6 +411,7 @@ class ColumnTransformer(BaseTransformer):
|
|
400
411
|
replace=True,
|
401
412
|
session=session,
|
402
413
|
statement_params=statement_params,
|
414
|
+
anonymous=True
|
403
415
|
)
|
404
416
|
def fit_wrapper_sproc(
|
405
417
|
session: Session,
|
@@ -408,7 +420,8 @@ class ColumnTransformer(BaseTransformer):
|
|
408
420
|
stage_result_file_name: str,
|
409
421
|
input_cols: List[str],
|
410
422
|
label_cols: List[str],
|
411
|
-
sample_weight_col: Optional[str]
|
423
|
+
sample_weight_col: Optional[str],
|
424
|
+
statement_params: Dict[str, str]
|
412
425
|
) -> str:
|
413
426
|
import cloudpickle as cp
|
414
427
|
import numpy as np
|
@@ -475,15 +488,15 @@ class ColumnTransformer(BaseTransformer):
|
|
475
488
|
api_calls=[Session.call],
|
476
489
|
custom_tags=dict([("autogen", True)]),
|
477
490
|
)
|
478
|
-
sproc_export_file_name =
|
479
|
-
|
491
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
492
|
+
session,
|
480
493
|
query,
|
481
494
|
stage_transform_file_name,
|
482
495
|
stage_result_file_name,
|
483
496
|
identifier.get_unescaped_names(self.input_cols),
|
484
497
|
identifier.get_unescaped_names(self.label_cols),
|
485
498
|
identifier.get_unescaped_names(self.sample_weight_col),
|
486
|
-
statement_params
|
499
|
+
statement_params,
|
487
500
|
)
|
488
501
|
|
489
502
|
if "|" in sproc_export_file_name:
|
@@ -493,7 +506,7 @@ class ColumnTransformer(BaseTransformer):
|
|
493
506
|
print("\n".join(fields[1:]))
|
494
507
|
|
495
508
|
session.file.get(
|
496
|
-
|
509
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
497
510
|
local_result_file_name,
|
498
511
|
statement_params=statement_params
|
499
512
|
)
|
@@ -539,7 +552,7 @@ class ColumnTransformer(BaseTransformer):
|
|
539
552
|
|
540
553
|
# Register vectorized UDF for batch inference
|
541
554
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
542
|
-
safe_id=self.
|
555
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
543
556
|
|
544
557
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
545
558
|
# will try to pickle all of self which fails.
|
@@ -631,7 +644,7 @@ class ColumnTransformer(BaseTransformer):
|
|
631
644
|
return transformed_pandas_df.to_dict("records")
|
632
645
|
|
633
646
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
634
|
-
safe_id=self.
|
647
|
+
safe_id=self._get_rand_id()
|
635
648
|
)
|
636
649
|
|
637
650
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -796,11 +809,18 @@ class ColumnTransformer(BaseTransformer):
|
|
796
809
|
Transformed dataset.
|
797
810
|
"""
|
798
811
|
if isinstance(dataset, DataFrame):
|
812
|
+
expected_type_inferred = ""
|
813
|
+
# when it is classifier, infer the datatype from label columns
|
814
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
815
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
816
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
817
|
+
)
|
818
|
+
|
799
819
|
output_df = self._batch_inference(
|
800
820
|
dataset=dataset,
|
801
821
|
inference_method="predict",
|
802
822
|
expected_output_cols_list=self.output_cols,
|
803
|
-
expected_output_cols_type=
|
823
|
+
expected_output_cols_type=expected_type_inferred,
|
804
824
|
)
|
805
825
|
elif isinstance(dataset, pd.DataFrame):
|
806
826
|
output_df = self._sklearn_inference(
|
@@ -873,10 +893,10 @@ class ColumnTransformer(BaseTransformer):
|
|
873
893
|
|
874
894
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
875
895
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
876
|
-
Returns
|
896
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
877
897
|
"""
|
878
898
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
879
|
-
return []
|
899
|
+
return [output_cols_prefix]
|
880
900
|
|
881
901
|
classes = self._sklearn_object.classes_
|
882
902
|
if isinstance(classes, numpy.ndarray):
|
@@ -1101,7 +1121,7 @@ class ColumnTransformer(BaseTransformer):
|
|
1101
1121
|
cp.dump(self._sklearn_object, local_score_file)
|
1102
1122
|
|
1103
1123
|
# Create temp stage to run score.
|
1104
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1124
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1105
1125
|
session = dataset._session
|
1106
1126
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1107
1127
|
SqlResultValidator(
|
@@ -1115,8 +1135,9 @@ class ColumnTransformer(BaseTransformer):
|
|
1115
1135
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1116
1136
|
).validate()
|
1117
1137
|
|
1118
|
-
|
1119
|
-
|
1138
|
+
# Use posixpath to construct stage paths
|
1139
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1140
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1120
1141
|
statement_params = telemetry.get_function_usage_statement_params(
|
1121
1142
|
project=_PROJECT,
|
1122
1143
|
subproject=_SUBPROJECT,
|
@@ -1142,6 +1163,7 @@ class ColumnTransformer(BaseTransformer):
|
|
1142
1163
|
replace=True,
|
1143
1164
|
session=session,
|
1144
1165
|
statement_params=statement_params,
|
1166
|
+
anonymous=True
|
1145
1167
|
)
|
1146
1168
|
def score_wrapper_sproc(
|
1147
1169
|
session: Session,
|
@@ -1149,7 +1171,8 @@ class ColumnTransformer(BaseTransformer):
|
|
1149
1171
|
stage_score_file_name: str,
|
1150
1172
|
input_cols: List[str],
|
1151
1173
|
label_cols: List[str],
|
1152
|
-
sample_weight_col: Optional[str]
|
1174
|
+
sample_weight_col: Optional[str],
|
1175
|
+
statement_params: Dict[str, str]
|
1153
1176
|
) -> float:
|
1154
1177
|
import cloudpickle as cp
|
1155
1178
|
import numpy as np
|
@@ -1199,14 +1222,14 @@ class ColumnTransformer(BaseTransformer):
|
|
1199
1222
|
api_calls=[Session.call],
|
1200
1223
|
custom_tags=dict([("autogen", True)]),
|
1201
1224
|
)
|
1202
|
-
score =
|
1203
|
-
|
1225
|
+
score = score_wrapper_sproc(
|
1226
|
+
session,
|
1204
1227
|
query,
|
1205
1228
|
stage_score_file_name,
|
1206
1229
|
identifier.get_unescaped_names(self.input_cols),
|
1207
1230
|
identifier.get_unescaped_names(self.label_cols),
|
1208
1231
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1209
|
-
statement_params
|
1232
|
+
statement_params,
|
1210
1233
|
)
|
1211
1234
|
|
1212
1235
|
cleanup_temp_files([local_score_file_name])
|
@@ -1224,18 +1247,20 @@ class ColumnTransformer(BaseTransformer):
|
|
1224
1247
|
if self._sklearn_object._estimator_type == 'classifier':
|
1225
1248
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1226
1249
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1227
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1250
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1251
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1228
1252
|
# For regressor, the type of predict is float64
|
1229
1253
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1230
1254
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1231
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1232
|
-
|
1255
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1256
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1233
1257
|
for prob_func in PROB_FUNCTIONS:
|
1234
1258
|
if hasattr(self, prob_func):
|
1235
1259
|
output_cols_prefix: str = f"{prob_func}_"
|
1236
1260
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1237
1261
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1238
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1262
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1263
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1239
1264
|
|
1240
1265
|
@property
|
1241
1266
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -214,7 +216,6 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
214
216
|
sample_weight_col: Optional[str] = None,
|
215
217
|
) -> None:
|
216
218
|
super().__init__()
|
217
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
218
219
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
219
220
|
|
220
221
|
self._deps = list(deps)
|
@@ -238,6 +239,15 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
238
239
|
self.set_drop_input_cols(drop_input_cols)
|
239
240
|
self.set_sample_weight_col(sample_weight_col)
|
240
241
|
|
242
|
+
def _get_rand_id(self) -> str:
|
243
|
+
"""
|
244
|
+
Generate random id to be used in sproc and stage names.
|
245
|
+
|
246
|
+
Returns:
|
247
|
+
Random id string usable in sproc, table, and stage names.
|
248
|
+
"""
|
249
|
+
return str(uuid4()).replace("-", "_").upper()
|
250
|
+
|
241
251
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
242
252
|
"""
|
243
253
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -316,7 +326,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
316
326
|
cp.dump(self._sklearn_object, local_transform_file)
|
317
327
|
|
318
328
|
# Create temp stage to run fit.
|
319
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
329
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
320
330
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
321
331
|
SqlResultValidator(
|
322
332
|
session=session,
|
@@ -329,11 +339,12 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
329
339
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
330
340
|
).validate()
|
331
341
|
|
332
|
-
|
342
|
+
# Use posixpath to construct stage paths
|
343
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
344
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
333
345
|
local_result_file_name = get_temp_file_path()
|
334
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
335
346
|
|
336
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
347
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
337
348
|
statement_params = telemetry.get_function_usage_statement_params(
|
338
349
|
project=_PROJECT,
|
339
350
|
subproject=_SUBPROJECT,
|
@@ -359,6 +370,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
359
370
|
replace=True,
|
360
371
|
session=session,
|
361
372
|
statement_params=statement_params,
|
373
|
+
anonymous=True
|
362
374
|
)
|
363
375
|
def fit_wrapper_sproc(
|
364
376
|
session: Session,
|
@@ -367,7 +379,8 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
367
379
|
stage_result_file_name: str,
|
368
380
|
input_cols: List[str],
|
369
381
|
label_cols: List[str],
|
370
|
-
sample_weight_col: Optional[str]
|
382
|
+
sample_weight_col: Optional[str],
|
383
|
+
statement_params: Dict[str, str]
|
371
384
|
) -> str:
|
372
385
|
import cloudpickle as cp
|
373
386
|
import numpy as np
|
@@ -434,15 +447,15 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
434
447
|
api_calls=[Session.call],
|
435
448
|
custom_tags=dict([("autogen", True)]),
|
436
449
|
)
|
437
|
-
sproc_export_file_name =
|
438
|
-
|
450
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
451
|
+
session,
|
439
452
|
query,
|
440
453
|
stage_transform_file_name,
|
441
454
|
stage_result_file_name,
|
442
455
|
identifier.get_unescaped_names(self.input_cols),
|
443
456
|
identifier.get_unescaped_names(self.label_cols),
|
444
457
|
identifier.get_unescaped_names(self.sample_weight_col),
|
445
|
-
statement_params
|
458
|
+
statement_params,
|
446
459
|
)
|
447
460
|
|
448
461
|
if "|" in sproc_export_file_name:
|
@@ -452,7 +465,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
452
465
|
print("\n".join(fields[1:]))
|
453
466
|
|
454
467
|
session.file.get(
|
455
|
-
|
468
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
456
469
|
local_result_file_name,
|
457
470
|
statement_params=statement_params
|
458
471
|
)
|
@@ -498,7 +511,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
498
511
|
|
499
512
|
# Register vectorized UDF for batch inference
|
500
513
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
501
|
-
safe_id=self.
|
514
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
502
515
|
|
503
516
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
504
517
|
# will try to pickle all of self which fails.
|
@@ -590,7 +603,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
590
603
|
return transformed_pandas_df.to_dict("records")
|
591
604
|
|
592
605
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
593
|
-
safe_id=self.
|
606
|
+
safe_id=self._get_rand_id()
|
594
607
|
)
|
595
608
|
|
596
609
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -757,11 +770,18 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
757
770
|
Transformed dataset.
|
758
771
|
"""
|
759
772
|
if isinstance(dataset, DataFrame):
|
773
|
+
expected_type_inferred = "float"
|
774
|
+
# when it is classifier, infer the datatype from label columns
|
775
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
776
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
777
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
778
|
+
)
|
779
|
+
|
760
780
|
output_df = self._batch_inference(
|
761
781
|
dataset=dataset,
|
762
782
|
inference_method="predict",
|
763
783
|
expected_output_cols_list=self.output_cols,
|
764
|
-
expected_output_cols_type=
|
784
|
+
expected_output_cols_type=expected_type_inferred,
|
765
785
|
)
|
766
786
|
elif isinstance(dataset, pd.DataFrame):
|
767
787
|
output_df = self._sklearn_inference(
|
@@ -832,10 +852,10 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
832
852
|
|
833
853
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
834
854
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
835
|
-
Returns
|
855
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
836
856
|
"""
|
837
857
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
838
|
-
return []
|
858
|
+
return [output_cols_prefix]
|
839
859
|
|
840
860
|
classes = self._sklearn_object.classes_
|
841
861
|
if isinstance(classes, numpy.ndarray):
|
@@ -1060,7 +1080,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
1060
1080
|
cp.dump(self._sklearn_object, local_score_file)
|
1061
1081
|
|
1062
1082
|
# Create temp stage to run score.
|
1063
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1083
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1064
1084
|
session = dataset._session
|
1065
1085
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1066
1086
|
SqlResultValidator(
|
@@ -1074,8 +1094,9 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
1074
1094
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1075
1095
|
).validate()
|
1076
1096
|
|
1077
|
-
|
1078
|
-
|
1097
|
+
# Use posixpath to construct stage paths
|
1098
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1099
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1079
1100
|
statement_params = telemetry.get_function_usage_statement_params(
|
1080
1101
|
project=_PROJECT,
|
1081
1102
|
subproject=_SUBPROJECT,
|
@@ -1101,6 +1122,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
1101
1122
|
replace=True,
|
1102
1123
|
session=session,
|
1103
1124
|
statement_params=statement_params,
|
1125
|
+
anonymous=True
|
1104
1126
|
)
|
1105
1127
|
def score_wrapper_sproc(
|
1106
1128
|
session: Session,
|
@@ -1108,7 +1130,8 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
1108
1130
|
stage_score_file_name: str,
|
1109
1131
|
input_cols: List[str],
|
1110
1132
|
label_cols: List[str],
|
1111
|
-
sample_weight_col: Optional[str]
|
1133
|
+
sample_weight_col: Optional[str],
|
1134
|
+
statement_params: Dict[str, str]
|
1112
1135
|
) -> float:
|
1113
1136
|
import cloudpickle as cp
|
1114
1137
|
import numpy as np
|
@@ -1158,14 +1181,14 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
1158
1181
|
api_calls=[Session.call],
|
1159
1182
|
custom_tags=dict([("autogen", True)]),
|
1160
1183
|
)
|
1161
|
-
score =
|
1162
|
-
|
1184
|
+
score = score_wrapper_sproc(
|
1185
|
+
session,
|
1163
1186
|
query,
|
1164
1187
|
stage_score_file_name,
|
1165
1188
|
identifier.get_unescaped_names(self.input_cols),
|
1166
1189
|
identifier.get_unescaped_names(self.label_cols),
|
1167
1190
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1168
|
-
statement_params
|
1191
|
+
statement_params,
|
1169
1192
|
)
|
1170
1193
|
|
1171
1194
|
cleanup_temp_files([local_score_file_name])
|
@@ -1183,18 +1206,20 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
1183
1206
|
if self._sklearn_object._estimator_type == 'classifier':
|
1184
1207
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1185
1208
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1186
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1209
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1210
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1187
1211
|
# For regressor, the type of predict is float64
|
1188
1212
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1189
1213
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1190
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1191
|
-
|
1214
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1215
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1192
1216
|
for prob_func in PROB_FUNCTIONS:
|
1193
1217
|
if hasattr(self, prob_func):
|
1194
1218
|
output_cols_prefix: str = f"{prob_func}_"
|
1195
1219
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1196
1220
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1197
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1221
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1222
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1198
1223
|
|
1199
1224
|
@property
|
1200
1225
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|