snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -268,7 +270,6 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
268
270
|
sample_weight_col: Optional[str] = None,
|
269
271
|
) -> None:
|
270
272
|
super().__init__()
|
271
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
272
273
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
273
274
|
|
274
275
|
self._deps = list(deps)
|
@@ -300,6 +301,15 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
300
301
|
self.set_drop_input_cols(drop_input_cols)
|
301
302
|
self.set_sample_weight_col(sample_weight_col)
|
302
303
|
|
304
|
+
def _get_rand_id(self) -> str:
|
305
|
+
"""
|
306
|
+
Generate random id to be used in sproc and stage names.
|
307
|
+
|
308
|
+
Returns:
|
309
|
+
Random id string usable in sproc, table, and stage names.
|
310
|
+
"""
|
311
|
+
return str(uuid4()).replace("-", "_").upper()
|
312
|
+
|
303
313
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
304
314
|
"""
|
305
315
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -378,7 +388,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
378
388
|
cp.dump(self._sklearn_object, local_transform_file)
|
379
389
|
|
380
390
|
# Create temp stage to run fit.
|
381
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
391
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
382
392
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
383
393
|
SqlResultValidator(
|
384
394
|
session=session,
|
@@ -391,11 +401,12 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
391
401
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
392
402
|
).validate()
|
393
403
|
|
394
|
-
|
404
|
+
# Use posixpath to construct stage paths
|
405
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
406
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
395
407
|
local_result_file_name = get_temp_file_path()
|
396
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
397
408
|
|
398
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
409
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
399
410
|
statement_params = telemetry.get_function_usage_statement_params(
|
400
411
|
project=_PROJECT,
|
401
412
|
subproject=_SUBPROJECT,
|
@@ -421,6 +432,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
421
432
|
replace=True,
|
422
433
|
session=session,
|
423
434
|
statement_params=statement_params,
|
435
|
+
anonymous=True
|
424
436
|
)
|
425
437
|
def fit_wrapper_sproc(
|
426
438
|
session: Session,
|
@@ -429,7 +441,8 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
429
441
|
stage_result_file_name: str,
|
430
442
|
input_cols: List[str],
|
431
443
|
label_cols: List[str],
|
432
|
-
sample_weight_col: Optional[str]
|
444
|
+
sample_weight_col: Optional[str],
|
445
|
+
statement_params: Dict[str, str]
|
433
446
|
) -> str:
|
434
447
|
import cloudpickle as cp
|
435
448
|
import numpy as np
|
@@ -496,15 +509,15 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
496
509
|
api_calls=[Session.call],
|
497
510
|
custom_tags=dict([("autogen", True)]),
|
498
511
|
)
|
499
|
-
sproc_export_file_name =
|
500
|
-
|
512
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
513
|
+
session,
|
501
514
|
query,
|
502
515
|
stage_transform_file_name,
|
503
516
|
stage_result_file_name,
|
504
517
|
identifier.get_unescaped_names(self.input_cols),
|
505
518
|
identifier.get_unescaped_names(self.label_cols),
|
506
519
|
identifier.get_unescaped_names(self.sample_weight_col),
|
507
|
-
statement_params
|
520
|
+
statement_params,
|
508
521
|
)
|
509
522
|
|
510
523
|
if "|" in sproc_export_file_name:
|
@@ -514,7 +527,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
514
527
|
print("\n".join(fields[1:]))
|
515
528
|
|
516
529
|
session.file.get(
|
517
|
-
|
530
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
518
531
|
local_result_file_name,
|
519
532
|
statement_params=statement_params
|
520
533
|
)
|
@@ -560,7 +573,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
560
573
|
|
561
574
|
# Register vectorized UDF for batch inference
|
562
575
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
563
|
-
safe_id=self.
|
576
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
564
577
|
|
565
578
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
566
579
|
# will try to pickle all of self which fails.
|
@@ -652,7 +665,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
652
665
|
return transformed_pandas_df.to_dict("records")
|
653
666
|
|
654
667
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
655
|
-
safe_id=self.
|
668
|
+
safe_id=self._get_rand_id()
|
656
669
|
)
|
657
670
|
|
658
671
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -819,11 +832,18 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
819
832
|
Transformed dataset.
|
820
833
|
"""
|
821
834
|
if isinstance(dataset, DataFrame):
|
835
|
+
expected_type_inferred = "float"
|
836
|
+
# when it is classifier, infer the datatype from label columns
|
837
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
838
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
839
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
840
|
+
)
|
841
|
+
|
822
842
|
output_df = self._batch_inference(
|
823
843
|
dataset=dataset,
|
824
844
|
inference_method="predict",
|
825
845
|
expected_output_cols_list=self.output_cols,
|
826
|
-
expected_output_cols_type=
|
846
|
+
expected_output_cols_type=expected_type_inferred,
|
827
847
|
)
|
828
848
|
elif isinstance(dataset, pd.DataFrame):
|
829
849
|
output_df = self._sklearn_inference(
|
@@ -894,10 +914,10 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
894
914
|
|
895
915
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
896
916
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
897
|
-
Returns
|
917
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
898
918
|
"""
|
899
919
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
900
|
-
return []
|
920
|
+
return [output_cols_prefix]
|
901
921
|
|
902
922
|
classes = self._sklearn_object.classes_
|
903
923
|
if isinstance(classes, numpy.ndarray):
|
@@ -1122,7 +1142,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
1122
1142
|
cp.dump(self._sklearn_object, local_score_file)
|
1123
1143
|
|
1124
1144
|
# Create temp stage to run score.
|
1125
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1145
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1126
1146
|
session = dataset._session
|
1127
1147
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1128
1148
|
SqlResultValidator(
|
@@ -1136,8 +1156,9 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
1136
1156
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1137
1157
|
).validate()
|
1138
1158
|
|
1139
|
-
|
1140
|
-
|
1159
|
+
# Use posixpath to construct stage paths
|
1160
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1161
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1141
1162
|
statement_params = telemetry.get_function_usage_statement_params(
|
1142
1163
|
project=_PROJECT,
|
1143
1164
|
subproject=_SUBPROJECT,
|
@@ -1163,6 +1184,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
1163
1184
|
replace=True,
|
1164
1185
|
session=session,
|
1165
1186
|
statement_params=statement_params,
|
1187
|
+
anonymous=True
|
1166
1188
|
)
|
1167
1189
|
def score_wrapper_sproc(
|
1168
1190
|
session: Session,
|
@@ -1170,7 +1192,8 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
1170
1192
|
stage_score_file_name: str,
|
1171
1193
|
input_cols: List[str],
|
1172
1194
|
label_cols: List[str],
|
1173
|
-
sample_weight_col: Optional[str]
|
1195
|
+
sample_weight_col: Optional[str],
|
1196
|
+
statement_params: Dict[str, str]
|
1174
1197
|
) -> float:
|
1175
1198
|
import cloudpickle as cp
|
1176
1199
|
import numpy as np
|
@@ -1220,14 +1243,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
1220
1243
|
api_calls=[Session.call],
|
1221
1244
|
custom_tags=dict([("autogen", True)]),
|
1222
1245
|
)
|
1223
|
-
score =
|
1224
|
-
|
1246
|
+
score = score_wrapper_sproc(
|
1247
|
+
session,
|
1225
1248
|
query,
|
1226
1249
|
stage_score_file_name,
|
1227
1250
|
identifier.get_unescaped_names(self.input_cols),
|
1228
1251
|
identifier.get_unescaped_names(self.label_cols),
|
1229
1252
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1230
|
-
statement_params
|
1253
|
+
statement_params,
|
1231
1254
|
)
|
1232
1255
|
|
1233
1256
|
cleanup_temp_files([local_score_file_name])
|
@@ -1245,18 +1268,20 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
1245
1268
|
if self._sklearn_object._estimator_type == 'classifier':
|
1246
1269
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1247
1270
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1248
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1271
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1272
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1249
1273
|
# For regressor, the type of predict is float64
|
1250
1274
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1251
1275
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1252
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1253
|
-
|
1276
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1277
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1254
1278
|
for prob_func in PROB_FUNCTIONS:
|
1255
1279
|
if hasattr(self, prob_func):
|
1256
1280
|
output_cols_prefix: str = f"{prob_func}_"
|
1257
1281
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1258
1282
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1259
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1283
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1284
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1260
1285
|
|
1261
1286
|
@property
|
1262
1287
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -224,7 +226,6 @@ class MultiTaskLasso(BaseTransformer):
|
|
224
226
|
sample_weight_col: Optional[str] = None,
|
225
227
|
) -> None:
|
226
228
|
super().__init__()
|
227
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
228
229
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
229
230
|
|
230
231
|
self._deps = list(deps)
|
@@ -251,6 +252,15 @@ class MultiTaskLasso(BaseTransformer):
|
|
251
252
|
self.set_drop_input_cols(drop_input_cols)
|
252
253
|
self.set_sample_weight_col(sample_weight_col)
|
253
254
|
|
255
|
+
def _get_rand_id(self) -> str:
|
256
|
+
"""
|
257
|
+
Generate random id to be used in sproc and stage names.
|
258
|
+
|
259
|
+
Returns:
|
260
|
+
Random id string usable in sproc, table, and stage names.
|
261
|
+
"""
|
262
|
+
return str(uuid4()).replace("-", "_").upper()
|
263
|
+
|
254
264
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
255
265
|
"""
|
256
266
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -329,7 +339,7 @@ class MultiTaskLasso(BaseTransformer):
|
|
329
339
|
cp.dump(self._sklearn_object, local_transform_file)
|
330
340
|
|
331
341
|
# Create temp stage to run fit.
|
332
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
342
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
333
343
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
334
344
|
SqlResultValidator(
|
335
345
|
session=session,
|
@@ -342,11 +352,12 @@ class MultiTaskLasso(BaseTransformer):
|
|
342
352
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
343
353
|
).validate()
|
344
354
|
|
345
|
-
|
355
|
+
# Use posixpath to construct stage paths
|
356
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
357
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
358
|
local_result_file_name = get_temp_file_path()
|
347
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
348
359
|
|
349
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
360
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
350
361
|
statement_params = telemetry.get_function_usage_statement_params(
|
351
362
|
project=_PROJECT,
|
352
363
|
subproject=_SUBPROJECT,
|
@@ -372,6 +383,7 @@ class MultiTaskLasso(BaseTransformer):
|
|
372
383
|
replace=True,
|
373
384
|
session=session,
|
374
385
|
statement_params=statement_params,
|
386
|
+
anonymous=True
|
375
387
|
)
|
376
388
|
def fit_wrapper_sproc(
|
377
389
|
session: Session,
|
@@ -380,7 +392,8 @@ class MultiTaskLasso(BaseTransformer):
|
|
380
392
|
stage_result_file_name: str,
|
381
393
|
input_cols: List[str],
|
382
394
|
label_cols: List[str],
|
383
|
-
sample_weight_col: Optional[str]
|
395
|
+
sample_weight_col: Optional[str],
|
396
|
+
statement_params: Dict[str, str]
|
384
397
|
) -> str:
|
385
398
|
import cloudpickle as cp
|
386
399
|
import numpy as np
|
@@ -447,15 +460,15 @@ class MultiTaskLasso(BaseTransformer):
|
|
447
460
|
api_calls=[Session.call],
|
448
461
|
custom_tags=dict([("autogen", True)]),
|
449
462
|
)
|
450
|
-
sproc_export_file_name =
|
451
|
-
|
463
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
464
|
+
session,
|
452
465
|
query,
|
453
466
|
stage_transform_file_name,
|
454
467
|
stage_result_file_name,
|
455
468
|
identifier.get_unescaped_names(self.input_cols),
|
456
469
|
identifier.get_unescaped_names(self.label_cols),
|
457
470
|
identifier.get_unescaped_names(self.sample_weight_col),
|
458
|
-
statement_params
|
471
|
+
statement_params,
|
459
472
|
)
|
460
473
|
|
461
474
|
if "|" in sproc_export_file_name:
|
@@ -465,7 +478,7 @@ class MultiTaskLasso(BaseTransformer):
|
|
465
478
|
print("\n".join(fields[1:]))
|
466
479
|
|
467
480
|
session.file.get(
|
468
|
-
|
481
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
469
482
|
local_result_file_name,
|
470
483
|
statement_params=statement_params
|
471
484
|
)
|
@@ -511,7 +524,7 @@ class MultiTaskLasso(BaseTransformer):
|
|
511
524
|
|
512
525
|
# Register vectorized UDF for batch inference
|
513
526
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
514
|
-
safe_id=self.
|
527
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
515
528
|
|
516
529
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
517
530
|
# will try to pickle all of self which fails.
|
@@ -603,7 +616,7 @@ class MultiTaskLasso(BaseTransformer):
|
|
603
616
|
return transformed_pandas_df.to_dict("records")
|
604
617
|
|
605
618
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
606
|
-
safe_id=self.
|
619
|
+
safe_id=self._get_rand_id()
|
607
620
|
)
|
608
621
|
|
609
622
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -770,11 +783,18 @@ class MultiTaskLasso(BaseTransformer):
|
|
770
783
|
Transformed dataset.
|
771
784
|
"""
|
772
785
|
if isinstance(dataset, DataFrame):
|
786
|
+
expected_type_inferred = "float"
|
787
|
+
# when it is classifier, infer the datatype from label columns
|
788
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
789
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
790
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
791
|
+
)
|
792
|
+
|
773
793
|
output_df = self._batch_inference(
|
774
794
|
dataset=dataset,
|
775
795
|
inference_method="predict",
|
776
796
|
expected_output_cols_list=self.output_cols,
|
777
|
-
expected_output_cols_type=
|
797
|
+
expected_output_cols_type=expected_type_inferred,
|
778
798
|
)
|
779
799
|
elif isinstance(dataset, pd.DataFrame):
|
780
800
|
output_df = self._sklearn_inference(
|
@@ -845,10 +865,10 @@ class MultiTaskLasso(BaseTransformer):
|
|
845
865
|
|
846
866
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
847
867
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
848
|
-
Returns
|
868
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
849
869
|
"""
|
850
870
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
851
|
-
return []
|
871
|
+
return [output_cols_prefix]
|
852
872
|
|
853
873
|
classes = self._sklearn_object.classes_
|
854
874
|
if isinstance(classes, numpy.ndarray):
|
@@ -1073,7 +1093,7 @@ class MultiTaskLasso(BaseTransformer):
|
|
1073
1093
|
cp.dump(self._sklearn_object, local_score_file)
|
1074
1094
|
|
1075
1095
|
# Create temp stage to run score.
|
1076
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1096
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1077
1097
|
session = dataset._session
|
1078
1098
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1079
1099
|
SqlResultValidator(
|
@@ -1087,8 +1107,9 @@ class MultiTaskLasso(BaseTransformer):
|
|
1087
1107
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1088
1108
|
).validate()
|
1089
1109
|
|
1090
|
-
|
1091
|
-
|
1110
|
+
# Use posixpath to construct stage paths
|
1111
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1112
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1092
1113
|
statement_params = telemetry.get_function_usage_statement_params(
|
1093
1114
|
project=_PROJECT,
|
1094
1115
|
subproject=_SUBPROJECT,
|
@@ -1114,6 +1135,7 @@ class MultiTaskLasso(BaseTransformer):
|
|
1114
1135
|
replace=True,
|
1115
1136
|
session=session,
|
1116
1137
|
statement_params=statement_params,
|
1138
|
+
anonymous=True
|
1117
1139
|
)
|
1118
1140
|
def score_wrapper_sproc(
|
1119
1141
|
session: Session,
|
@@ -1121,7 +1143,8 @@ class MultiTaskLasso(BaseTransformer):
|
|
1121
1143
|
stage_score_file_name: str,
|
1122
1144
|
input_cols: List[str],
|
1123
1145
|
label_cols: List[str],
|
1124
|
-
sample_weight_col: Optional[str]
|
1146
|
+
sample_weight_col: Optional[str],
|
1147
|
+
statement_params: Dict[str, str]
|
1125
1148
|
) -> float:
|
1126
1149
|
import cloudpickle as cp
|
1127
1150
|
import numpy as np
|
@@ -1171,14 +1194,14 @@ class MultiTaskLasso(BaseTransformer):
|
|
1171
1194
|
api_calls=[Session.call],
|
1172
1195
|
custom_tags=dict([("autogen", True)]),
|
1173
1196
|
)
|
1174
|
-
score =
|
1175
|
-
|
1197
|
+
score = score_wrapper_sproc(
|
1198
|
+
session,
|
1176
1199
|
query,
|
1177
1200
|
stage_score_file_name,
|
1178
1201
|
identifier.get_unescaped_names(self.input_cols),
|
1179
1202
|
identifier.get_unescaped_names(self.label_cols),
|
1180
1203
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1181
|
-
statement_params
|
1204
|
+
statement_params,
|
1182
1205
|
)
|
1183
1206
|
|
1184
1207
|
cleanup_temp_files([local_score_file_name])
|
@@ -1196,18 +1219,20 @@ class MultiTaskLasso(BaseTransformer):
|
|
1196
1219
|
if self._sklearn_object._estimator_type == 'classifier':
|
1197
1220
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1198
1221
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1199
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1222
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1223
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1200
1224
|
# For regressor, the type of predict is float64
|
1201
1225
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1202
1226
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1203
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1204
|
-
|
1227
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1228
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1205
1229
|
for prob_func in PROB_FUNCTIONS:
|
1206
1230
|
if hasattr(self, prob_func):
|
1207
1231
|
output_cols_prefix: str = f"{prob_func}_"
|
1208
1232
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1209
1233
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1210
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1234
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1235
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1211
1236
|
|
1212
1237
|
@property
|
1213
1238
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|