snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -260,7 +262,6 @@ class LassoLarsCV(BaseTransformer):
|
|
260
262
|
sample_weight_col: Optional[str] = None,
|
261
263
|
) -> None:
|
262
264
|
super().__init__()
|
263
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
264
265
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
265
266
|
|
266
267
|
self._deps = list(deps)
|
@@ -290,6 +291,15 @@ class LassoLarsCV(BaseTransformer):
|
|
290
291
|
self.set_drop_input_cols(drop_input_cols)
|
291
292
|
self.set_sample_weight_col(sample_weight_col)
|
292
293
|
|
294
|
+
def _get_rand_id(self) -> str:
|
295
|
+
"""
|
296
|
+
Generate random id to be used in sproc and stage names.
|
297
|
+
|
298
|
+
Returns:
|
299
|
+
Random id string usable in sproc, table, and stage names.
|
300
|
+
"""
|
301
|
+
return str(uuid4()).replace("-", "_").upper()
|
302
|
+
|
293
303
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
294
304
|
"""
|
295
305
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -368,7 +378,7 @@ class LassoLarsCV(BaseTransformer):
|
|
368
378
|
cp.dump(self._sklearn_object, local_transform_file)
|
369
379
|
|
370
380
|
# Create temp stage to run fit.
|
371
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
381
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
372
382
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
373
383
|
SqlResultValidator(
|
374
384
|
session=session,
|
@@ -381,11 +391,12 @@ class LassoLarsCV(BaseTransformer):
|
|
381
391
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
382
392
|
).validate()
|
383
393
|
|
384
|
-
|
394
|
+
# Use posixpath to construct stage paths
|
395
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
396
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
385
397
|
local_result_file_name = get_temp_file_path()
|
386
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
387
398
|
|
388
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
399
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
389
400
|
statement_params = telemetry.get_function_usage_statement_params(
|
390
401
|
project=_PROJECT,
|
391
402
|
subproject=_SUBPROJECT,
|
@@ -411,6 +422,7 @@ class LassoLarsCV(BaseTransformer):
|
|
411
422
|
replace=True,
|
412
423
|
session=session,
|
413
424
|
statement_params=statement_params,
|
425
|
+
anonymous=True
|
414
426
|
)
|
415
427
|
def fit_wrapper_sproc(
|
416
428
|
session: Session,
|
@@ -419,7 +431,8 @@ class LassoLarsCV(BaseTransformer):
|
|
419
431
|
stage_result_file_name: str,
|
420
432
|
input_cols: List[str],
|
421
433
|
label_cols: List[str],
|
422
|
-
sample_weight_col: Optional[str]
|
434
|
+
sample_weight_col: Optional[str],
|
435
|
+
statement_params: Dict[str, str]
|
423
436
|
) -> str:
|
424
437
|
import cloudpickle as cp
|
425
438
|
import numpy as np
|
@@ -486,15 +499,15 @@ class LassoLarsCV(BaseTransformer):
|
|
486
499
|
api_calls=[Session.call],
|
487
500
|
custom_tags=dict([("autogen", True)]),
|
488
501
|
)
|
489
|
-
sproc_export_file_name =
|
490
|
-
|
502
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
503
|
+
session,
|
491
504
|
query,
|
492
505
|
stage_transform_file_name,
|
493
506
|
stage_result_file_name,
|
494
507
|
identifier.get_unescaped_names(self.input_cols),
|
495
508
|
identifier.get_unescaped_names(self.label_cols),
|
496
509
|
identifier.get_unescaped_names(self.sample_weight_col),
|
497
|
-
statement_params
|
510
|
+
statement_params,
|
498
511
|
)
|
499
512
|
|
500
513
|
if "|" in sproc_export_file_name:
|
@@ -504,7 +517,7 @@ class LassoLarsCV(BaseTransformer):
|
|
504
517
|
print("\n".join(fields[1:]))
|
505
518
|
|
506
519
|
session.file.get(
|
507
|
-
|
520
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
508
521
|
local_result_file_name,
|
509
522
|
statement_params=statement_params
|
510
523
|
)
|
@@ -550,7 +563,7 @@ class LassoLarsCV(BaseTransformer):
|
|
550
563
|
|
551
564
|
# Register vectorized UDF for batch inference
|
552
565
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
553
|
-
safe_id=self.
|
566
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
554
567
|
|
555
568
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
556
569
|
# will try to pickle all of self which fails.
|
@@ -642,7 +655,7 @@ class LassoLarsCV(BaseTransformer):
|
|
642
655
|
return transformed_pandas_df.to_dict("records")
|
643
656
|
|
644
657
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
645
|
-
safe_id=self.
|
658
|
+
safe_id=self._get_rand_id()
|
646
659
|
)
|
647
660
|
|
648
661
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -809,11 +822,18 @@ class LassoLarsCV(BaseTransformer):
|
|
809
822
|
Transformed dataset.
|
810
823
|
"""
|
811
824
|
if isinstance(dataset, DataFrame):
|
825
|
+
expected_type_inferred = "float"
|
826
|
+
# when it is classifier, infer the datatype from label columns
|
827
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
828
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
829
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
830
|
+
)
|
831
|
+
|
812
832
|
output_df = self._batch_inference(
|
813
833
|
dataset=dataset,
|
814
834
|
inference_method="predict",
|
815
835
|
expected_output_cols_list=self.output_cols,
|
816
|
-
expected_output_cols_type=
|
836
|
+
expected_output_cols_type=expected_type_inferred,
|
817
837
|
)
|
818
838
|
elif isinstance(dataset, pd.DataFrame):
|
819
839
|
output_df = self._sklearn_inference(
|
@@ -884,10 +904,10 @@ class LassoLarsCV(BaseTransformer):
|
|
884
904
|
|
885
905
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
886
906
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
887
|
-
Returns
|
907
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
888
908
|
"""
|
889
909
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
890
|
-
return []
|
910
|
+
return [output_cols_prefix]
|
891
911
|
|
892
912
|
classes = self._sklearn_object.classes_
|
893
913
|
if isinstance(classes, numpy.ndarray):
|
@@ -1112,7 +1132,7 @@ class LassoLarsCV(BaseTransformer):
|
|
1112
1132
|
cp.dump(self._sklearn_object, local_score_file)
|
1113
1133
|
|
1114
1134
|
# Create temp stage to run score.
|
1115
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1135
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1116
1136
|
session = dataset._session
|
1117
1137
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1118
1138
|
SqlResultValidator(
|
@@ -1126,8 +1146,9 @@ class LassoLarsCV(BaseTransformer):
|
|
1126
1146
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1127
1147
|
).validate()
|
1128
1148
|
|
1129
|
-
|
1130
|
-
|
1149
|
+
# Use posixpath to construct stage paths
|
1150
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1151
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1131
1152
|
statement_params = telemetry.get_function_usage_statement_params(
|
1132
1153
|
project=_PROJECT,
|
1133
1154
|
subproject=_SUBPROJECT,
|
@@ -1153,6 +1174,7 @@ class LassoLarsCV(BaseTransformer):
|
|
1153
1174
|
replace=True,
|
1154
1175
|
session=session,
|
1155
1176
|
statement_params=statement_params,
|
1177
|
+
anonymous=True
|
1156
1178
|
)
|
1157
1179
|
def score_wrapper_sproc(
|
1158
1180
|
session: Session,
|
@@ -1160,7 +1182,8 @@ class LassoLarsCV(BaseTransformer):
|
|
1160
1182
|
stage_score_file_name: str,
|
1161
1183
|
input_cols: List[str],
|
1162
1184
|
label_cols: List[str],
|
1163
|
-
sample_weight_col: Optional[str]
|
1185
|
+
sample_weight_col: Optional[str],
|
1186
|
+
statement_params: Dict[str, str]
|
1164
1187
|
) -> float:
|
1165
1188
|
import cloudpickle as cp
|
1166
1189
|
import numpy as np
|
@@ -1210,14 +1233,14 @@ class LassoLarsCV(BaseTransformer):
|
|
1210
1233
|
api_calls=[Session.call],
|
1211
1234
|
custom_tags=dict([("autogen", True)]),
|
1212
1235
|
)
|
1213
|
-
score =
|
1214
|
-
|
1236
|
+
score = score_wrapper_sproc(
|
1237
|
+
session,
|
1215
1238
|
query,
|
1216
1239
|
stage_score_file_name,
|
1217
1240
|
identifier.get_unescaped_names(self.input_cols),
|
1218
1241
|
identifier.get_unescaped_names(self.label_cols),
|
1219
1242
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1220
|
-
statement_params
|
1243
|
+
statement_params,
|
1221
1244
|
)
|
1222
1245
|
|
1223
1246
|
cleanup_temp_files([local_score_file_name])
|
@@ -1235,18 +1258,20 @@ class LassoLarsCV(BaseTransformer):
|
|
1235
1258
|
if self._sklearn_object._estimator_type == 'classifier':
|
1236
1259
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1237
1260
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1238
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1261
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1262
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1239
1263
|
# For regressor, the type of predict is float64
|
1240
1264
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1241
1265
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1242
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1243
|
-
|
1266
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1267
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1244
1268
|
for prob_func in PROB_FUNCTIONS:
|
1245
1269
|
if hasattr(self, prob_func):
|
1246
1270
|
output_cols_prefix: str = f"{prob_func}_"
|
1247
1271
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1248
1272
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1249
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1273
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1274
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1250
1275
|
|
1251
1276
|
@property
|
1252
1277
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -244,7 +246,6 @@ class LassoLarsIC(BaseTransformer):
|
|
244
246
|
sample_weight_col: Optional[str] = None,
|
245
247
|
) -> None:
|
246
248
|
super().__init__()
|
247
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
248
249
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
249
250
|
|
250
251
|
self._deps = list(deps)
|
@@ -273,6 +274,15 @@ class LassoLarsIC(BaseTransformer):
|
|
273
274
|
self.set_drop_input_cols(drop_input_cols)
|
274
275
|
self.set_sample_weight_col(sample_weight_col)
|
275
276
|
|
277
|
+
def _get_rand_id(self) -> str:
|
278
|
+
"""
|
279
|
+
Generate random id to be used in sproc and stage names.
|
280
|
+
|
281
|
+
Returns:
|
282
|
+
Random id string usable in sproc, table, and stage names.
|
283
|
+
"""
|
284
|
+
return str(uuid4()).replace("-", "_").upper()
|
285
|
+
|
276
286
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
277
287
|
"""
|
278
288
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -351,7 +361,7 @@ class LassoLarsIC(BaseTransformer):
|
|
351
361
|
cp.dump(self._sklearn_object, local_transform_file)
|
352
362
|
|
353
363
|
# Create temp stage to run fit.
|
354
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
364
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
355
365
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
356
366
|
SqlResultValidator(
|
357
367
|
session=session,
|
@@ -364,11 +374,12 @@ class LassoLarsIC(BaseTransformer):
|
|
364
374
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
365
375
|
).validate()
|
366
376
|
|
367
|
-
|
377
|
+
# Use posixpath to construct stage paths
|
378
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
379
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
368
380
|
local_result_file_name = get_temp_file_path()
|
369
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
370
381
|
|
371
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
382
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
372
383
|
statement_params = telemetry.get_function_usage_statement_params(
|
373
384
|
project=_PROJECT,
|
374
385
|
subproject=_SUBPROJECT,
|
@@ -394,6 +405,7 @@ class LassoLarsIC(BaseTransformer):
|
|
394
405
|
replace=True,
|
395
406
|
session=session,
|
396
407
|
statement_params=statement_params,
|
408
|
+
anonymous=True
|
397
409
|
)
|
398
410
|
def fit_wrapper_sproc(
|
399
411
|
session: Session,
|
@@ -402,7 +414,8 @@ class LassoLarsIC(BaseTransformer):
|
|
402
414
|
stage_result_file_name: str,
|
403
415
|
input_cols: List[str],
|
404
416
|
label_cols: List[str],
|
405
|
-
sample_weight_col: Optional[str]
|
417
|
+
sample_weight_col: Optional[str],
|
418
|
+
statement_params: Dict[str, str]
|
406
419
|
) -> str:
|
407
420
|
import cloudpickle as cp
|
408
421
|
import numpy as np
|
@@ -469,15 +482,15 @@ class LassoLarsIC(BaseTransformer):
|
|
469
482
|
api_calls=[Session.call],
|
470
483
|
custom_tags=dict([("autogen", True)]),
|
471
484
|
)
|
472
|
-
sproc_export_file_name =
|
473
|
-
|
485
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
486
|
+
session,
|
474
487
|
query,
|
475
488
|
stage_transform_file_name,
|
476
489
|
stage_result_file_name,
|
477
490
|
identifier.get_unescaped_names(self.input_cols),
|
478
491
|
identifier.get_unescaped_names(self.label_cols),
|
479
492
|
identifier.get_unescaped_names(self.sample_weight_col),
|
480
|
-
statement_params
|
493
|
+
statement_params,
|
481
494
|
)
|
482
495
|
|
483
496
|
if "|" in sproc_export_file_name:
|
@@ -487,7 +500,7 @@ class LassoLarsIC(BaseTransformer):
|
|
487
500
|
print("\n".join(fields[1:]))
|
488
501
|
|
489
502
|
session.file.get(
|
490
|
-
|
503
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
491
504
|
local_result_file_name,
|
492
505
|
statement_params=statement_params
|
493
506
|
)
|
@@ -533,7 +546,7 @@ class LassoLarsIC(BaseTransformer):
|
|
533
546
|
|
534
547
|
# Register vectorized UDF for batch inference
|
535
548
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
536
|
-
safe_id=self.
|
549
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
537
550
|
|
538
551
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
539
552
|
# will try to pickle all of self which fails.
|
@@ -625,7 +638,7 @@ class LassoLarsIC(BaseTransformer):
|
|
625
638
|
return transformed_pandas_df.to_dict("records")
|
626
639
|
|
627
640
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
628
|
-
safe_id=self.
|
641
|
+
safe_id=self._get_rand_id()
|
629
642
|
)
|
630
643
|
|
631
644
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -792,11 +805,18 @@ class LassoLarsIC(BaseTransformer):
|
|
792
805
|
Transformed dataset.
|
793
806
|
"""
|
794
807
|
if isinstance(dataset, DataFrame):
|
808
|
+
expected_type_inferred = "float"
|
809
|
+
# when it is classifier, infer the datatype from label columns
|
810
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
811
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
812
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
813
|
+
)
|
814
|
+
|
795
815
|
output_df = self._batch_inference(
|
796
816
|
dataset=dataset,
|
797
817
|
inference_method="predict",
|
798
818
|
expected_output_cols_list=self.output_cols,
|
799
|
-
expected_output_cols_type=
|
819
|
+
expected_output_cols_type=expected_type_inferred,
|
800
820
|
)
|
801
821
|
elif isinstance(dataset, pd.DataFrame):
|
802
822
|
output_df = self._sklearn_inference(
|
@@ -867,10 +887,10 @@ class LassoLarsIC(BaseTransformer):
|
|
867
887
|
|
868
888
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
869
889
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
870
|
-
Returns
|
890
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
871
891
|
"""
|
872
892
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
873
|
-
return []
|
893
|
+
return [output_cols_prefix]
|
874
894
|
|
875
895
|
classes = self._sklearn_object.classes_
|
876
896
|
if isinstance(classes, numpy.ndarray):
|
@@ -1095,7 +1115,7 @@ class LassoLarsIC(BaseTransformer):
|
|
1095
1115
|
cp.dump(self._sklearn_object, local_score_file)
|
1096
1116
|
|
1097
1117
|
# Create temp stage to run score.
|
1098
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1118
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1099
1119
|
session = dataset._session
|
1100
1120
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1101
1121
|
SqlResultValidator(
|
@@ -1109,8 +1129,9 @@ class LassoLarsIC(BaseTransformer):
|
|
1109
1129
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1110
1130
|
).validate()
|
1111
1131
|
|
1112
|
-
|
1113
|
-
|
1132
|
+
# Use posixpath to construct stage paths
|
1133
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1134
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1114
1135
|
statement_params = telemetry.get_function_usage_statement_params(
|
1115
1136
|
project=_PROJECT,
|
1116
1137
|
subproject=_SUBPROJECT,
|
@@ -1136,6 +1157,7 @@ class LassoLarsIC(BaseTransformer):
|
|
1136
1157
|
replace=True,
|
1137
1158
|
session=session,
|
1138
1159
|
statement_params=statement_params,
|
1160
|
+
anonymous=True
|
1139
1161
|
)
|
1140
1162
|
def score_wrapper_sproc(
|
1141
1163
|
session: Session,
|
@@ -1143,7 +1165,8 @@ class LassoLarsIC(BaseTransformer):
|
|
1143
1165
|
stage_score_file_name: str,
|
1144
1166
|
input_cols: List[str],
|
1145
1167
|
label_cols: List[str],
|
1146
|
-
sample_weight_col: Optional[str]
|
1168
|
+
sample_weight_col: Optional[str],
|
1169
|
+
statement_params: Dict[str, str]
|
1147
1170
|
) -> float:
|
1148
1171
|
import cloudpickle as cp
|
1149
1172
|
import numpy as np
|
@@ -1193,14 +1216,14 @@ class LassoLarsIC(BaseTransformer):
|
|
1193
1216
|
api_calls=[Session.call],
|
1194
1217
|
custom_tags=dict([("autogen", True)]),
|
1195
1218
|
)
|
1196
|
-
score =
|
1197
|
-
|
1219
|
+
score = score_wrapper_sproc(
|
1220
|
+
session,
|
1198
1221
|
query,
|
1199
1222
|
stage_score_file_name,
|
1200
1223
|
identifier.get_unescaped_names(self.input_cols),
|
1201
1224
|
identifier.get_unescaped_names(self.label_cols),
|
1202
1225
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1203
|
-
statement_params
|
1226
|
+
statement_params,
|
1204
1227
|
)
|
1205
1228
|
|
1206
1229
|
cleanup_temp_files([local_score_file_name])
|
@@ -1218,18 +1241,20 @@ class LassoLarsIC(BaseTransformer):
|
|
1218
1241
|
if self._sklearn_object._estimator_type == 'classifier':
|
1219
1242
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1220
1243
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1221
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1244
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1245
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1222
1246
|
# For regressor, the type of predict is float64
|
1223
1247
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1224
1248
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1225
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1226
|
-
|
1249
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1250
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1227
1251
|
for prob_func in PROB_FUNCTIONS:
|
1228
1252
|
if hasattr(self, prob_func):
|
1229
1253
|
output_cols_prefix: str = f"{prob_func}_"
|
1230
1254
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1231
1255
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1232
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1256
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1257
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1233
1258
|
|
1234
1259
|
@property
|
1235
1260
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|