snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -264,7 +266,6 @@ class LassoCV(BaseTransformer):
|
|
264
266
|
sample_weight_col: Optional[str] = None,
|
265
267
|
) -> None:
|
266
268
|
super().__init__()
|
267
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
268
269
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
269
270
|
|
270
271
|
self._deps = list(deps)
|
@@ -297,6 +298,15 @@ class LassoCV(BaseTransformer):
|
|
297
298
|
self.set_drop_input_cols(drop_input_cols)
|
298
299
|
self.set_sample_weight_col(sample_weight_col)
|
299
300
|
|
301
|
+
def _get_rand_id(self) -> str:
|
302
|
+
"""
|
303
|
+
Generate random id to be used in sproc and stage names.
|
304
|
+
|
305
|
+
Returns:
|
306
|
+
Random id string usable in sproc, table, and stage names.
|
307
|
+
"""
|
308
|
+
return str(uuid4()).replace("-", "_").upper()
|
309
|
+
|
300
310
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
301
311
|
"""
|
302
312
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -375,7 +385,7 @@ class LassoCV(BaseTransformer):
|
|
375
385
|
cp.dump(self._sklearn_object, local_transform_file)
|
376
386
|
|
377
387
|
# Create temp stage to run fit.
|
378
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
388
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
379
389
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
380
390
|
SqlResultValidator(
|
381
391
|
session=session,
|
@@ -388,11 +398,12 @@ class LassoCV(BaseTransformer):
|
|
388
398
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
389
399
|
).validate()
|
390
400
|
|
391
|
-
|
401
|
+
# Use posixpath to construct stage paths
|
402
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
403
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
392
404
|
local_result_file_name = get_temp_file_path()
|
393
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
394
405
|
|
395
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
406
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
396
407
|
statement_params = telemetry.get_function_usage_statement_params(
|
397
408
|
project=_PROJECT,
|
398
409
|
subproject=_SUBPROJECT,
|
@@ -418,6 +429,7 @@ class LassoCV(BaseTransformer):
|
|
418
429
|
replace=True,
|
419
430
|
session=session,
|
420
431
|
statement_params=statement_params,
|
432
|
+
anonymous=True
|
421
433
|
)
|
422
434
|
def fit_wrapper_sproc(
|
423
435
|
session: Session,
|
@@ -426,7 +438,8 @@ class LassoCV(BaseTransformer):
|
|
426
438
|
stage_result_file_name: str,
|
427
439
|
input_cols: List[str],
|
428
440
|
label_cols: List[str],
|
429
|
-
sample_weight_col: Optional[str]
|
441
|
+
sample_weight_col: Optional[str],
|
442
|
+
statement_params: Dict[str, str]
|
430
443
|
) -> str:
|
431
444
|
import cloudpickle as cp
|
432
445
|
import numpy as np
|
@@ -493,15 +506,15 @@ class LassoCV(BaseTransformer):
|
|
493
506
|
api_calls=[Session.call],
|
494
507
|
custom_tags=dict([("autogen", True)]),
|
495
508
|
)
|
496
|
-
sproc_export_file_name =
|
497
|
-
|
509
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
510
|
+
session,
|
498
511
|
query,
|
499
512
|
stage_transform_file_name,
|
500
513
|
stage_result_file_name,
|
501
514
|
identifier.get_unescaped_names(self.input_cols),
|
502
515
|
identifier.get_unescaped_names(self.label_cols),
|
503
516
|
identifier.get_unescaped_names(self.sample_weight_col),
|
504
|
-
statement_params
|
517
|
+
statement_params,
|
505
518
|
)
|
506
519
|
|
507
520
|
if "|" in sproc_export_file_name:
|
@@ -511,7 +524,7 @@ class LassoCV(BaseTransformer):
|
|
511
524
|
print("\n".join(fields[1:]))
|
512
525
|
|
513
526
|
session.file.get(
|
514
|
-
|
527
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
515
528
|
local_result_file_name,
|
516
529
|
statement_params=statement_params
|
517
530
|
)
|
@@ -557,7 +570,7 @@ class LassoCV(BaseTransformer):
|
|
557
570
|
|
558
571
|
# Register vectorized UDF for batch inference
|
559
572
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
560
|
-
safe_id=self.
|
573
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
561
574
|
|
562
575
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
563
576
|
# will try to pickle all of self which fails.
|
@@ -649,7 +662,7 @@ class LassoCV(BaseTransformer):
|
|
649
662
|
return transformed_pandas_df.to_dict("records")
|
650
663
|
|
651
664
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
652
|
-
safe_id=self.
|
665
|
+
safe_id=self._get_rand_id()
|
653
666
|
)
|
654
667
|
|
655
668
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -816,11 +829,18 @@ class LassoCV(BaseTransformer):
|
|
816
829
|
Transformed dataset.
|
817
830
|
"""
|
818
831
|
if isinstance(dataset, DataFrame):
|
832
|
+
expected_type_inferred = "float"
|
833
|
+
# when it is classifier, infer the datatype from label columns
|
834
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
835
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
836
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
837
|
+
)
|
838
|
+
|
819
839
|
output_df = self._batch_inference(
|
820
840
|
dataset=dataset,
|
821
841
|
inference_method="predict",
|
822
842
|
expected_output_cols_list=self.output_cols,
|
823
|
-
expected_output_cols_type=
|
843
|
+
expected_output_cols_type=expected_type_inferred,
|
824
844
|
)
|
825
845
|
elif isinstance(dataset, pd.DataFrame):
|
826
846
|
output_df = self._sklearn_inference(
|
@@ -891,10 +911,10 @@ class LassoCV(BaseTransformer):
|
|
891
911
|
|
892
912
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
893
913
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
894
|
-
Returns
|
914
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
895
915
|
"""
|
896
916
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
897
|
-
return []
|
917
|
+
return [output_cols_prefix]
|
898
918
|
|
899
919
|
classes = self._sklearn_object.classes_
|
900
920
|
if isinstance(classes, numpy.ndarray):
|
@@ -1119,7 +1139,7 @@ class LassoCV(BaseTransformer):
|
|
1119
1139
|
cp.dump(self._sklearn_object, local_score_file)
|
1120
1140
|
|
1121
1141
|
# Create temp stage to run score.
|
1122
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1142
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1123
1143
|
session = dataset._session
|
1124
1144
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1125
1145
|
SqlResultValidator(
|
@@ -1133,8 +1153,9 @@ class LassoCV(BaseTransformer):
|
|
1133
1153
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1134
1154
|
).validate()
|
1135
1155
|
|
1136
|
-
|
1137
|
-
|
1156
|
+
# Use posixpath to construct stage paths
|
1157
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1158
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1138
1159
|
statement_params = telemetry.get_function_usage_statement_params(
|
1139
1160
|
project=_PROJECT,
|
1140
1161
|
subproject=_SUBPROJECT,
|
@@ -1160,6 +1181,7 @@ class LassoCV(BaseTransformer):
|
|
1160
1181
|
replace=True,
|
1161
1182
|
session=session,
|
1162
1183
|
statement_params=statement_params,
|
1184
|
+
anonymous=True
|
1163
1185
|
)
|
1164
1186
|
def score_wrapper_sproc(
|
1165
1187
|
session: Session,
|
@@ -1167,7 +1189,8 @@ class LassoCV(BaseTransformer):
|
|
1167
1189
|
stage_score_file_name: str,
|
1168
1190
|
input_cols: List[str],
|
1169
1191
|
label_cols: List[str],
|
1170
|
-
sample_weight_col: Optional[str]
|
1192
|
+
sample_weight_col: Optional[str],
|
1193
|
+
statement_params: Dict[str, str]
|
1171
1194
|
) -> float:
|
1172
1195
|
import cloudpickle as cp
|
1173
1196
|
import numpy as np
|
@@ -1217,14 +1240,14 @@ class LassoCV(BaseTransformer):
|
|
1217
1240
|
api_calls=[Session.call],
|
1218
1241
|
custom_tags=dict([("autogen", True)]),
|
1219
1242
|
)
|
1220
|
-
score =
|
1221
|
-
|
1243
|
+
score = score_wrapper_sproc(
|
1244
|
+
session,
|
1222
1245
|
query,
|
1223
1246
|
stage_score_file_name,
|
1224
1247
|
identifier.get_unescaped_names(self.input_cols),
|
1225
1248
|
identifier.get_unescaped_names(self.label_cols),
|
1226
1249
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1227
|
-
statement_params
|
1250
|
+
statement_params,
|
1228
1251
|
)
|
1229
1252
|
|
1230
1253
|
cleanup_temp_files([local_score_file_name])
|
@@ -1242,18 +1265,20 @@ class LassoCV(BaseTransformer):
|
|
1242
1265
|
if self._sklearn_object._estimator_type == 'classifier':
|
1243
1266
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1244
1267
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1245
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1268
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1269
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1246
1270
|
# For regressor, the type of predict is float64
|
1247
1271
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1248
1272
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1249
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1250
|
-
|
1273
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1274
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1251
1275
|
for prob_func in PROB_FUNCTIONS:
|
1252
1276
|
if hasattr(self, prob_func):
|
1253
1277
|
output_cols_prefix: str = f"{prob_func}_"
|
1254
1278
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1255
1279
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1256
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1280
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1281
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1257
1282
|
|
1258
1283
|
@property
|
1259
1284
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -258,7 +260,6 @@ class LassoLars(BaseTransformer):
|
|
258
260
|
sample_weight_col: Optional[str] = None,
|
259
261
|
) -> None:
|
260
262
|
super().__init__()
|
261
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
262
263
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
263
264
|
|
264
265
|
self._deps = list(deps)
|
@@ -289,6 +290,15 @@ class LassoLars(BaseTransformer):
|
|
289
290
|
self.set_drop_input_cols(drop_input_cols)
|
290
291
|
self.set_sample_weight_col(sample_weight_col)
|
291
292
|
|
293
|
+
def _get_rand_id(self) -> str:
|
294
|
+
"""
|
295
|
+
Generate random id to be used in sproc and stage names.
|
296
|
+
|
297
|
+
Returns:
|
298
|
+
Random id string usable in sproc, table, and stage names.
|
299
|
+
"""
|
300
|
+
return str(uuid4()).replace("-", "_").upper()
|
301
|
+
|
292
302
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
293
303
|
"""
|
294
304
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -367,7 +377,7 @@ class LassoLars(BaseTransformer):
|
|
367
377
|
cp.dump(self._sklearn_object, local_transform_file)
|
368
378
|
|
369
379
|
# Create temp stage to run fit.
|
370
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
380
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
371
381
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
372
382
|
SqlResultValidator(
|
373
383
|
session=session,
|
@@ -380,11 +390,12 @@ class LassoLars(BaseTransformer):
|
|
380
390
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
381
391
|
).validate()
|
382
392
|
|
383
|
-
|
393
|
+
# Use posixpath to construct stage paths
|
394
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
395
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
384
396
|
local_result_file_name = get_temp_file_path()
|
385
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
386
397
|
|
387
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
398
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
388
399
|
statement_params = telemetry.get_function_usage_statement_params(
|
389
400
|
project=_PROJECT,
|
390
401
|
subproject=_SUBPROJECT,
|
@@ -410,6 +421,7 @@ class LassoLars(BaseTransformer):
|
|
410
421
|
replace=True,
|
411
422
|
session=session,
|
412
423
|
statement_params=statement_params,
|
424
|
+
anonymous=True
|
413
425
|
)
|
414
426
|
def fit_wrapper_sproc(
|
415
427
|
session: Session,
|
@@ -418,7 +430,8 @@ class LassoLars(BaseTransformer):
|
|
418
430
|
stage_result_file_name: str,
|
419
431
|
input_cols: List[str],
|
420
432
|
label_cols: List[str],
|
421
|
-
sample_weight_col: Optional[str]
|
433
|
+
sample_weight_col: Optional[str],
|
434
|
+
statement_params: Dict[str, str]
|
422
435
|
) -> str:
|
423
436
|
import cloudpickle as cp
|
424
437
|
import numpy as np
|
@@ -485,15 +498,15 @@ class LassoLars(BaseTransformer):
|
|
485
498
|
api_calls=[Session.call],
|
486
499
|
custom_tags=dict([("autogen", True)]),
|
487
500
|
)
|
488
|
-
sproc_export_file_name =
|
489
|
-
|
501
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
502
|
+
session,
|
490
503
|
query,
|
491
504
|
stage_transform_file_name,
|
492
505
|
stage_result_file_name,
|
493
506
|
identifier.get_unescaped_names(self.input_cols),
|
494
507
|
identifier.get_unescaped_names(self.label_cols),
|
495
508
|
identifier.get_unescaped_names(self.sample_weight_col),
|
496
|
-
statement_params
|
509
|
+
statement_params,
|
497
510
|
)
|
498
511
|
|
499
512
|
if "|" in sproc_export_file_name:
|
@@ -503,7 +516,7 @@ class LassoLars(BaseTransformer):
|
|
503
516
|
print("\n".join(fields[1:]))
|
504
517
|
|
505
518
|
session.file.get(
|
506
|
-
|
519
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
507
520
|
local_result_file_name,
|
508
521
|
statement_params=statement_params
|
509
522
|
)
|
@@ -549,7 +562,7 @@ class LassoLars(BaseTransformer):
|
|
549
562
|
|
550
563
|
# Register vectorized UDF for batch inference
|
551
564
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
552
|
-
safe_id=self.
|
565
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
553
566
|
|
554
567
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
555
568
|
# will try to pickle all of self which fails.
|
@@ -641,7 +654,7 @@ class LassoLars(BaseTransformer):
|
|
641
654
|
return transformed_pandas_df.to_dict("records")
|
642
655
|
|
643
656
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
644
|
-
safe_id=self.
|
657
|
+
safe_id=self._get_rand_id()
|
645
658
|
)
|
646
659
|
|
647
660
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -808,11 +821,18 @@ class LassoLars(BaseTransformer):
|
|
808
821
|
Transformed dataset.
|
809
822
|
"""
|
810
823
|
if isinstance(dataset, DataFrame):
|
824
|
+
expected_type_inferred = "float"
|
825
|
+
# when it is classifier, infer the datatype from label columns
|
826
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
827
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
828
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
829
|
+
)
|
830
|
+
|
811
831
|
output_df = self._batch_inference(
|
812
832
|
dataset=dataset,
|
813
833
|
inference_method="predict",
|
814
834
|
expected_output_cols_list=self.output_cols,
|
815
|
-
expected_output_cols_type=
|
835
|
+
expected_output_cols_type=expected_type_inferred,
|
816
836
|
)
|
817
837
|
elif isinstance(dataset, pd.DataFrame):
|
818
838
|
output_df = self._sklearn_inference(
|
@@ -883,10 +903,10 @@ class LassoLars(BaseTransformer):
|
|
883
903
|
|
884
904
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
885
905
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
886
|
-
Returns
|
906
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
887
907
|
"""
|
888
908
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
889
|
-
return []
|
909
|
+
return [output_cols_prefix]
|
890
910
|
|
891
911
|
classes = self._sklearn_object.classes_
|
892
912
|
if isinstance(classes, numpy.ndarray):
|
@@ -1111,7 +1131,7 @@ class LassoLars(BaseTransformer):
|
|
1111
1131
|
cp.dump(self._sklearn_object, local_score_file)
|
1112
1132
|
|
1113
1133
|
# Create temp stage to run score.
|
1114
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1134
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1115
1135
|
session = dataset._session
|
1116
1136
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1117
1137
|
SqlResultValidator(
|
@@ -1125,8 +1145,9 @@ class LassoLars(BaseTransformer):
|
|
1125
1145
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1126
1146
|
).validate()
|
1127
1147
|
|
1128
|
-
|
1129
|
-
|
1148
|
+
# Use posixpath to construct stage paths
|
1149
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1150
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1130
1151
|
statement_params = telemetry.get_function_usage_statement_params(
|
1131
1152
|
project=_PROJECT,
|
1132
1153
|
subproject=_SUBPROJECT,
|
@@ -1152,6 +1173,7 @@ class LassoLars(BaseTransformer):
|
|
1152
1173
|
replace=True,
|
1153
1174
|
session=session,
|
1154
1175
|
statement_params=statement_params,
|
1176
|
+
anonymous=True
|
1155
1177
|
)
|
1156
1178
|
def score_wrapper_sproc(
|
1157
1179
|
session: Session,
|
@@ -1159,7 +1181,8 @@ class LassoLars(BaseTransformer):
|
|
1159
1181
|
stage_score_file_name: str,
|
1160
1182
|
input_cols: List[str],
|
1161
1183
|
label_cols: List[str],
|
1162
|
-
sample_weight_col: Optional[str]
|
1184
|
+
sample_weight_col: Optional[str],
|
1185
|
+
statement_params: Dict[str, str]
|
1163
1186
|
) -> float:
|
1164
1187
|
import cloudpickle as cp
|
1165
1188
|
import numpy as np
|
@@ -1209,14 +1232,14 @@ class LassoLars(BaseTransformer):
|
|
1209
1232
|
api_calls=[Session.call],
|
1210
1233
|
custom_tags=dict([("autogen", True)]),
|
1211
1234
|
)
|
1212
|
-
score =
|
1213
|
-
|
1235
|
+
score = score_wrapper_sproc(
|
1236
|
+
session,
|
1214
1237
|
query,
|
1215
1238
|
stage_score_file_name,
|
1216
1239
|
identifier.get_unescaped_names(self.input_cols),
|
1217
1240
|
identifier.get_unescaped_names(self.label_cols),
|
1218
1241
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1219
|
-
statement_params
|
1242
|
+
statement_params,
|
1220
1243
|
)
|
1221
1244
|
|
1222
1245
|
cleanup_temp_files([local_score_file_name])
|
@@ -1234,18 +1257,20 @@ class LassoLars(BaseTransformer):
|
|
1234
1257
|
if self._sklearn_object._estimator_type == 'classifier':
|
1235
1258
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1236
1259
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1237
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1260
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1261
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1238
1262
|
# For regressor, the type of predict is float64
|
1239
1263
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1240
1264
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1241
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1242
|
-
|
1265
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1266
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1243
1267
|
for prob_func in PROB_FUNCTIONS:
|
1244
1268
|
if hasattr(self, prob_func):
|
1245
1269
|
output_cols_prefix: str = f"{prob_func}_"
|
1246
1270
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1247
1271
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1248
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1272
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1273
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1249
1274
|
|
1250
1275
|
@property
|
1251
1276
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|