snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -325,7 +327,6 @@ class LogisticRegressionCV(BaseTransformer):
|
|
325
327
|
sample_weight_col: Optional[str] = None,
|
326
328
|
) -> None:
|
327
329
|
super().__init__()
|
328
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
329
330
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
330
331
|
|
331
332
|
self._deps = list(deps)
|
@@ -361,6 +362,15 @@ class LogisticRegressionCV(BaseTransformer):
|
|
361
362
|
self.set_drop_input_cols(drop_input_cols)
|
362
363
|
self.set_sample_weight_col(sample_weight_col)
|
363
364
|
|
365
|
+
def _get_rand_id(self) -> str:
|
366
|
+
"""
|
367
|
+
Generate random id to be used in sproc and stage names.
|
368
|
+
|
369
|
+
Returns:
|
370
|
+
Random id string usable in sproc, table, and stage names.
|
371
|
+
"""
|
372
|
+
return str(uuid4()).replace("-", "_").upper()
|
373
|
+
|
364
374
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
365
375
|
"""
|
366
376
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -439,7 +449,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
439
449
|
cp.dump(self._sklearn_object, local_transform_file)
|
440
450
|
|
441
451
|
# Create temp stage to run fit.
|
442
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
452
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
443
453
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
444
454
|
SqlResultValidator(
|
445
455
|
session=session,
|
@@ -452,11 +462,12 @@ class LogisticRegressionCV(BaseTransformer):
|
|
452
462
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
453
463
|
).validate()
|
454
464
|
|
455
|
-
|
465
|
+
# Use posixpath to construct stage paths
|
466
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
467
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
456
468
|
local_result_file_name = get_temp_file_path()
|
457
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
458
469
|
|
459
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
470
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
460
471
|
statement_params = telemetry.get_function_usage_statement_params(
|
461
472
|
project=_PROJECT,
|
462
473
|
subproject=_SUBPROJECT,
|
@@ -482,6 +493,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
482
493
|
replace=True,
|
483
494
|
session=session,
|
484
495
|
statement_params=statement_params,
|
496
|
+
anonymous=True
|
485
497
|
)
|
486
498
|
def fit_wrapper_sproc(
|
487
499
|
session: Session,
|
@@ -490,7 +502,8 @@ class LogisticRegressionCV(BaseTransformer):
|
|
490
502
|
stage_result_file_name: str,
|
491
503
|
input_cols: List[str],
|
492
504
|
label_cols: List[str],
|
493
|
-
sample_weight_col: Optional[str]
|
505
|
+
sample_weight_col: Optional[str],
|
506
|
+
statement_params: Dict[str, str]
|
494
507
|
) -> str:
|
495
508
|
import cloudpickle as cp
|
496
509
|
import numpy as np
|
@@ -557,15 +570,15 @@ class LogisticRegressionCV(BaseTransformer):
|
|
557
570
|
api_calls=[Session.call],
|
558
571
|
custom_tags=dict([("autogen", True)]),
|
559
572
|
)
|
560
|
-
sproc_export_file_name =
|
561
|
-
|
573
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
574
|
+
session,
|
562
575
|
query,
|
563
576
|
stage_transform_file_name,
|
564
577
|
stage_result_file_name,
|
565
578
|
identifier.get_unescaped_names(self.input_cols),
|
566
579
|
identifier.get_unescaped_names(self.label_cols),
|
567
580
|
identifier.get_unescaped_names(self.sample_weight_col),
|
568
|
-
statement_params
|
581
|
+
statement_params,
|
569
582
|
)
|
570
583
|
|
571
584
|
if "|" in sproc_export_file_name:
|
@@ -575,7 +588,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
575
588
|
print("\n".join(fields[1:]))
|
576
589
|
|
577
590
|
session.file.get(
|
578
|
-
|
591
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
579
592
|
local_result_file_name,
|
580
593
|
statement_params=statement_params
|
581
594
|
)
|
@@ -621,7 +634,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
621
634
|
|
622
635
|
# Register vectorized UDF for batch inference
|
623
636
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
624
|
-
safe_id=self.
|
637
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
625
638
|
|
626
639
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
627
640
|
# will try to pickle all of self which fails.
|
@@ -713,7 +726,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
713
726
|
return transformed_pandas_df.to_dict("records")
|
714
727
|
|
715
728
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
716
|
-
safe_id=self.
|
729
|
+
safe_id=self._get_rand_id()
|
717
730
|
)
|
718
731
|
|
719
732
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -880,11 +893,18 @@ class LogisticRegressionCV(BaseTransformer):
|
|
880
893
|
Transformed dataset.
|
881
894
|
"""
|
882
895
|
if isinstance(dataset, DataFrame):
|
896
|
+
expected_type_inferred = ""
|
897
|
+
# when it is classifier, infer the datatype from label columns
|
898
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
899
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
900
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
901
|
+
)
|
902
|
+
|
883
903
|
output_df = self._batch_inference(
|
884
904
|
dataset=dataset,
|
885
905
|
inference_method="predict",
|
886
906
|
expected_output_cols_list=self.output_cols,
|
887
|
-
expected_output_cols_type=
|
907
|
+
expected_output_cols_type=expected_type_inferred,
|
888
908
|
)
|
889
909
|
elif isinstance(dataset, pd.DataFrame):
|
890
910
|
output_df = self._sklearn_inference(
|
@@ -955,10 +975,10 @@ class LogisticRegressionCV(BaseTransformer):
|
|
955
975
|
|
956
976
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
957
977
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
958
|
-
Returns
|
978
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
959
979
|
"""
|
960
980
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
961
|
-
return []
|
981
|
+
return [output_cols_prefix]
|
962
982
|
|
963
983
|
classes = self._sklearn_object.classes_
|
964
984
|
if isinstance(classes, numpy.ndarray):
|
@@ -1189,7 +1209,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
1189
1209
|
cp.dump(self._sklearn_object, local_score_file)
|
1190
1210
|
|
1191
1211
|
# Create temp stage to run score.
|
1192
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1212
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1193
1213
|
session = dataset._session
|
1194
1214
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1195
1215
|
SqlResultValidator(
|
@@ -1203,8 +1223,9 @@ class LogisticRegressionCV(BaseTransformer):
|
|
1203
1223
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1204
1224
|
).validate()
|
1205
1225
|
|
1206
|
-
|
1207
|
-
|
1226
|
+
# Use posixpath to construct stage paths
|
1227
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1228
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1208
1229
|
statement_params = telemetry.get_function_usage_statement_params(
|
1209
1230
|
project=_PROJECT,
|
1210
1231
|
subproject=_SUBPROJECT,
|
@@ -1230,6 +1251,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
1230
1251
|
replace=True,
|
1231
1252
|
session=session,
|
1232
1253
|
statement_params=statement_params,
|
1254
|
+
anonymous=True
|
1233
1255
|
)
|
1234
1256
|
def score_wrapper_sproc(
|
1235
1257
|
session: Session,
|
@@ -1237,7 +1259,8 @@ class LogisticRegressionCV(BaseTransformer):
|
|
1237
1259
|
stage_score_file_name: str,
|
1238
1260
|
input_cols: List[str],
|
1239
1261
|
label_cols: List[str],
|
1240
|
-
sample_weight_col: Optional[str]
|
1262
|
+
sample_weight_col: Optional[str],
|
1263
|
+
statement_params: Dict[str, str]
|
1241
1264
|
) -> float:
|
1242
1265
|
import cloudpickle as cp
|
1243
1266
|
import numpy as np
|
@@ -1287,14 +1310,14 @@ class LogisticRegressionCV(BaseTransformer):
|
|
1287
1310
|
api_calls=[Session.call],
|
1288
1311
|
custom_tags=dict([("autogen", True)]),
|
1289
1312
|
)
|
1290
|
-
score =
|
1291
|
-
|
1313
|
+
score = score_wrapper_sproc(
|
1314
|
+
session,
|
1292
1315
|
query,
|
1293
1316
|
stage_score_file_name,
|
1294
1317
|
identifier.get_unescaped_names(self.input_cols),
|
1295
1318
|
identifier.get_unescaped_names(self.label_cols),
|
1296
1319
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1297
|
-
statement_params
|
1320
|
+
statement_params,
|
1298
1321
|
)
|
1299
1322
|
|
1300
1323
|
cleanup_temp_files([local_score_file_name])
|
@@ -1312,18 +1335,20 @@ class LogisticRegressionCV(BaseTransformer):
|
|
1312
1335
|
if self._sklearn_object._estimator_type == 'classifier':
|
1313
1336
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1314
1337
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1315
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1338
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1339
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1316
1340
|
# For regressor, the type of predict is float64
|
1317
1341
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1318
1342
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1319
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1320
|
-
|
1343
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1344
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1321
1345
|
for prob_func in PROB_FUNCTIONS:
|
1322
1346
|
if hasattr(self, prob_func):
|
1323
1347
|
output_cols_prefix: str = f"{prob_func}_"
|
1324
1348
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1325
1349
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1326
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1350
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1351
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1327
1352
|
|
1328
1353
|
@property
|
1329
1354
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -231,7 +233,6 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
231
233
|
sample_weight_col: Optional[str] = None,
|
232
234
|
) -> None:
|
233
235
|
super().__init__()
|
234
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
235
236
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
236
237
|
|
237
238
|
self._deps = list(deps)
|
@@ -259,6 +260,15 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
259
260
|
self.set_drop_input_cols(drop_input_cols)
|
260
261
|
self.set_sample_weight_col(sample_weight_col)
|
261
262
|
|
263
|
+
def _get_rand_id(self) -> str:
|
264
|
+
"""
|
265
|
+
Generate random id to be used in sproc and stage names.
|
266
|
+
|
267
|
+
Returns:
|
268
|
+
Random id string usable in sproc, table, and stage names.
|
269
|
+
"""
|
270
|
+
return str(uuid4()).replace("-", "_").upper()
|
271
|
+
|
262
272
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
263
273
|
"""
|
264
274
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -337,7 +347,7 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
337
347
|
cp.dump(self._sklearn_object, local_transform_file)
|
338
348
|
|
339
349
|
# Create temp stage to run fit.
|
340
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
350
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
341
351
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
342
352
|
SqlResultValidator(
|
343
353
|
session=session,
|
@@ -350,11 +360,12 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
350
360
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
351
361
|
).validate()
|
352
362
|
|
353
|
-
|
363
|
+
# Use posixpath to construct stage paths
|
364
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
365
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
354
366
|
local_result_file_name = get_temp_file_path()
|
355
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
356
367
|
|
357
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
368
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
358
369
|
statement_params = telemetry.get_function_usage_statement_params(
|
359
370
|
project=_PROJECT,
|
360
371
|
subproject=_SUBPROJECT,
|
@@ -380,6 +391,7 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
380
391
|
replace=True,
|
381
392
|
session=session,
|
382
393
|
statement_params=statement_params,
|
394
|
+
anonymous=True
|
383
395
|
)
|
384
396
|
def fit_wrapper_sproc(
|
385
397
|
session: Session,
|
@@ -388,7 +400,8 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
388
400
|
stage_result_file_name: str,
|
389
401
|
input_cols: List[str],
|
390
402
|
label_cols: List[str],
|
391
|
-
sample_weight_col: Optional[str]
|
403
|
+
sample_weight_col: Optional[str],
|
404
|
+
statement_params: Dict[str, str]
|
392
405
|
) -> str:
|
393
406
|
import cloudpickle as cp
|
394
407
|
import numpy as np
|
@@ -455,15 +468,15 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
455
468
|
api_calls=[Session.call],
|
456
469
|
custom_tags=dict([("autogen", True)]),
|
457
470
|
)
|
458
|
-
sproc_export_file_name =
|
459
|
-
|
471
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
472
|
+
session,
|
460
473
|
query,
|
461
474
|
stage_transform_file_name,
|
462
475
|
stage_result_file_name,
|
463
476
|
identifier.get_unescaped_names(self.input_cols),
|
464
477
|
identifier.get_unescaped_names(self.label_cols),
|
465
478
|
identifier.get_unescaped_names(self.sample_weight_col),
|
466
|
-
statement_params
|
479
|
+
statement_params,
|
467
480
|
)
|
468
481
|
|
469
482
|
if "|" in sproc_export_file_name:
|
@@ -473,7 +486,7 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
473
486
|
print("\n".join(fields[1:]))
|
474
487
|
|
475
488
|
session.file.get(
|
476
|
-
|
489
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
477
490
|
local_result_file_name,
|
478
491
|
statement_params=statement_params
|
479
492
|
)
|
@@ -519,7 +532,7 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
519
532
|
|
520
533
|
# Register vectorized UDF for batch inference
|
521
534
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
522
|
-
safe_id=self.
|
535
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
523
536
|
|
524
537
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
525
538
|
# will try to pickle all of self which fails.
|
@@ -611,7 +624,7 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
611
624
|
return transformed_pandas_df.to_dict("records")
|
612
625
|
|
613
626
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
614
|
-
safe_id=self.
|
627
|
+
safe_id=self._get_rand_id()
|
615
628
|
)
|
616
629
|
|
617
630
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -778,11 +791,18 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
778
791
|
Transformed dataset.
|
779
792
|
"""
|
780
793
|
if isinstance(dataset, DataFrame):
|
794
|
+
expected_type_inferred = "float"
|
795
|
+
# when it is classifier, infer the datatype from label columns
|
796
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
797
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
798
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
799
|
+
)
|
800
|
+
|
781
801
|
output_df = self._batch_inference(
|
782
802
|
dataset=dataset,
|
783
803
|
inference_method="predict",
|
784
804
|
expected_output_cols_list=self.output_cols,
|
785
|
-
expected_output_cols_type=
|
805
|
+
expected_output_cols_type=expected_type_inferred,
|
786
806
|
)
|
787
807
|
elif isinstance(dataset, pd.DataFrame):
|
788
808
|
output_df = self._sklearn_inference(
|
@@ -853,10 +873,10 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
853
873
|
|
854
874
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
855
875
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
856
|
-
Returns
|
876
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
857
877
|
"""
|
858
878
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
859
|
-
return []
|
879
|
+
return [output_cols_prefix]
|
860
880
|
|
861
881
|
classes = self._sklearn_object.classes_
|
862
882
|
if isinstance(classes, numpy.ndarray):
|
@@ -1081,7 +1101,7 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
1081
1101
|
cp.dump(self._sklearn_object, local_score_file)
|
1082
1102
|
|
1083
1103
|
# Create temp stage to run score.
|
1084
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1104
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1085
1105
|
session = dataset._session
|
1086
1106
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1087
1107
|
SqlResultValidator(
|
@@ -1095,8 +1115,9 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
1095
1115
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1096
1116
|
).validate()
|
1097
1117
|
|
1098
|
-
|
1099
|
-
|
1118
|
+
# Use posixpath to construct stage paths
|
1119
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1120
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1100
1121
|
statement_params = telemetry.get_function_usage_statement_params(
|
1101
1122
|
project=_PROJECT,
|
1102
1123
|
subproject=_SUBPROJECT,
|
@@ -1122,6 +1143,7 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
1122
1143
|
replace=True,
|
1123
1144
|
session=session,
|
1124
1145
|
statement_params=statement_params,
|
1146
|
+
anonymous=True
|
1125
1147
|
)
|
1126
1148
|
def score_wrapper_sproc(
|
1127
1149
|
session: Session,
|
@@ -1129,7 +1151,8 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
1129
1151
|
stage_score_file_name: str,
|
1130
1152
|
input_cols: List[str],
|
1131
1153
|
label_cols: List[str],
|
1132
|
-
sample_weight_col: Optional[str]
|
1154
|
+
sample_weight_col: Optional[str],
|
1155
|
+
statement_params: Dict[str, str]
|
1133
1156
|
) -> float:
|
1134
1157
|
import cloudpickle as cp
|
1135
1158
|
import numpy as np
|
@@ -1179,14 +1202,14 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
1179
1202
|
api_calls=[Session.call],
|
1180
1203
|
custom_tags=dict([("autogen", True)]),
|
1181
1204
|
)
|
1182
|
-
score =
|
1183
|
-
|
1205
|
+
score = score_wrapper_sproc(
|
1206
|
+
session,
|
1184
1207
|
query,
|
1185
1208
|
stage_score_file_name,
|
1186
1209
|
identifier.get_unescaped_names(self.input_cols),
|
1187
1210
|
identifier.get_unescaped_names(self.label_cols),
|
1188
1211
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1189
|
-
statement_params
|
1212
|
+
statement_params,
|
1190
1213
|
)
|
1191
1214
|
|
1192
1215
|
cleanup_temp_files([local_score_file_name])
|
@@ -1204,18 +1227,20 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
1204
1227
|
if self._sklearn_object._estimator_type == 'classifier':
|
1205
1228
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1206
1229
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1207
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1230
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1231
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1208
1232
|
# For regressor, the type of predict is float64
|
1209
1233
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1210
1234
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1211
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1212
|
-
|
1235
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1236
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1213
1237
|
for prob_func in PROB_FUNCTIONS:
|
1214
1238
|
if hasattr(self, prob_func):
|
1215
1239
|
output_cols_prefix: str = f"{prob_func}_"
|
1216
1240
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1217
1241
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1218
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1242
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1243
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1219
1244
|
|
1220
1245
|
@property
|
1221
1246
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|