snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -255,7 +257,6 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
255
257
|
sample_weight_col: Optional[str] = None,
|
256
258
|
) -> None:
|
257
259
|
super().__init__()
|
258
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
259
260
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
260
261
|
|
261
262
|
self._deps = list(deps)
|
@@ -286,6 +287,15 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
286
287
|
self.set_drop_input_cols(drop_input_cols)
|
287
288
|
self.set_sample_weight_col(sample_weight_col)
|
288
289
|
|
290
|
+
def _get_rand_id(self) -> str:
|
291
|
+
"""
|
292
|
+
Generate random id to be used in sproc and stage names.
|
293
|
+
|
294
|
+
Returns:
|
295
|
+
Random id string usable in sproc, table, and stage names.
|
296
|
+
"""
|
297
|
+
return str(uuid4()).replace("-", "_").upper()
|
298
|
+
|
289
299
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
290
300
|
"""
|
291
301
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -364,7 +374,7 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
364
374
|
cp.dump(self._sklearn_object, local_transform_file)
|
365
375
|
|
366
376
|
# Create temp stage to run fit.
|
367
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
377
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
368
378
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
369
379
|
SqlResultValidator(
|
370
380
|
session=session,
|
@@ -377,11 +387,12 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
377
387
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
378
388
|
).validate()
|
379
389
|
|
380
|
-
|
390
|
+
# Use posixpath to construct stage paths
|
391
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
392
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
381
393
|
local_result_file_name = get_temp_file_path()
|
382
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
383
394
|
|
384
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
395
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
385
396
|
statement_params = telemetry.get_function_usage_statement_params(
|
386
397
|
project=_PROJECT,
|
387
398
|
subproject=_SUBPROJECT,
|
@@ -407,6 +418,7 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
407
418
|
replace=True,
|
408
419
|
session=session,
|
409
420
|
statement_params=statement_params,
|
421
|
+
anonymous=True
|
410
422
|
)
|
411
423
|
def fit_wrapper_sproc(
|
412
424
|
session: Session,
|
@@ -415,7 +427,8 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
415
427
|
stage_result_file_name: str,
|
416
428
|
input_cols: List[str],
|
417
429
|
label_cols: List[str],
|
418
|
-
sample_weight_col: Optional[str]
|
430
|
+
sample_weight_col: Optional[str],
|
431
|
+
statement_params: Dict[str, str]
|
419
432
|
) -> str:
|
420
433
|
import cloudpickle as cp
|
421
434
|
import numpy as np
|
@@ -482,15 +495,15 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
482
495
|
api_calls=[Session.call],
|
483
496
|
custom_tags=dict([("autogen", True)]),
|
484
497
|
)
|
485
|
-
sproc_export_file_name =
|
486
|
-
|
498
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
499
|
+
session,
|
487
500
|
query,
|
488
501
|
stage_transform_file_name,
|
489
502
|
stage_result_file_name,
|
490
503
|
identifier.get_unescaped_names(self.input_cols),
|
491
504
|
identifier.get_unescaped_names(self.label_cols),
|
492
505
|
identifier.get_unescaped_names(self.sample_weight_col),
|
493
|
-
statement_params
|
506
|
+
statement_params,
|
494
507
|
)
|
495
508
|
|
496
509
|
if "|" in sproc_export_file_name:
|
@@ -500,7 +513,7 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
500
513
|
print("\n".join(fields[1:]))
|
501
514
|
|
502
515
|
session.file.get(
|
503
|
-
|
516
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
504
517
|
local_result_file_name,
|
505
518
|
statement_params=statement_params
|
506
519
|
)
|
@@ -546,7 +559,7 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
546
559
|
|
547
560
|
# Register vectorized UDF for batch inference
|
548
561
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
549
|
-
safe_id=self.
|
562
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
550
563
|
|
551
564
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
552
565
|
# will try to pickle all of self which fails.
|
@@ -638,7 +651,7 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
638
651
|
return transformed_pandas_df.to_dict("records")
|
639
652
|
|
640
653
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
641
|
-
safe_id=self.
|
654
|
+
safe_id=self._get_rand_id()
|
642
655
|
)
|
643
656
|
|
644
657
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -805,11 +818,18 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
805
818
|
Transformed dataset.
|
806
819
|
"""
|
807
820
|
if isinstance(dataset, DataFrame):
|
821
|
+
expected_type_inferred = "float"
|
822
|
+
# when it is classifier, infer the datatype from label columns
|
823
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
824
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
825
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
826
|
+
)
|
827
|
+
|
808
828
|
output_df = self._batch_inference(
|
809
829
|
dataset=dataset,
|
810
830
|
inference_method="predict",
|
811
831
|
expected_output_cols_list=self.output_cols,
|
812
|
-
expected_output_cols_type=
|
832
|
+
expected_output_cols_type=expected_type_inferred,
|
813
833
|
)
|
814
834
|
elif isinstance(dataset, pd.DataFrame):
|
815
835
|
output_df = self._sklearn_inference(
|
@@ -880,10 +900,10 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
880
900
|
|
881
901
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
882
902
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
883
|
-
Returns
|
903
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
884
904
|
"""
|
885
905
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
886
|
-
return []
|
906
|
+
return [output_cols_prefix]
|
887
907
|
|
888
908
|
classes = self._sklearn_object.classes_
|
889
909
|
if isinstance(classes, numpy.ndarray):
|
@@ -1108,7 +1128,7 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
1108
1128
|
cp.dump(self._sklearn_object, local_score_file)
|
1109
1129
|
|
1110
1130
|
# Create temp stage to run score.
|
1111
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1131
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1112
1132
|
session = dataset._session
|
1113
1133
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1114
1134
|
SqlResultValidator(
|
@@ -1122,8 +1142,9 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
1122
1142
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1123
1143
|
).validate()
|
1124
1144
|
|
1125
|
-
|
1126
|
-
|
1145
|
+
# Use posixpath to construct stage paths
|
1146
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1147
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1127
1148
|
statement_params = telemetry.get_function_usage_statement_params(
|
1128
1149
|
project=_PROJECT,
|
1129
1150
|
subproject=_SUBPROJECT,
|
@@ -1149,6 +1170,7 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
1149
1170
|
replace=True,
|
1150
1171
|
session=session,
|
1151
1172
|
statement_params=statement_params,
|
1173
|
+
anonymous=True
|
1152
1174
|
)
|
1153
1175
|
def score_wrapper_sproc(
|
1154
1176
|
session: Session,
|
@@ -1156,7 +1178,8 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
1156
1178
|
stage_score_file_name: str,
|
1157
1179
|
input_cols: List[str],
|
1158
1180
|
label_cols: List[str],
|
1159
|
-
sample_weight_col: Optional[str]
|
1181
|
+
sample_weight_col: Optional[str],
|
1182
|
+
statement_params: Dict[str, str]
|
1160
1183
|
) -> float:
|
1161
1184
|
import cloudpickle as cp
|
1162
1185
|
import numpy as np
|
@@ -1206,14 +1229,14 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
1206
1229
|
api_calls=[Session.call],
|
1207
1230
|
custom_tags=dict([("autogen", True)]),
|
1208
1231
|
)
|
1209
|
-
score =
|
1210
|
-
|
1232
|
+
score = score_wrapper_sproc(
|
1233
|
+
session,
|
1211
1234
|
query,
|
1212
1235
|
stage_score_file_name,
|
1213
1236
|
identifier.get_unescaped_names(self.input_cols),
|
1214
1237
|
identifier.get_unescaped_names(self.label_cols),
|
1215
1238
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1216
|
-
statement_params
|
1239
|
+
statement_params,
|
1217
1240
|
)
|
1218
1241
|
|
1219
1242
|
cleanup_temp_files([local_score_file_name])
|
@@ -1231,18 +1254,20 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
1231
1254
|
if self._sklearn_object._estimator_type == 'classifier':
|
1232
1255
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1233
1256
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1234
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1257
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1258
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1235
1259
|
# For regressor, the type of predict is float64
|
1236
1260
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1237
1261
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1238
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1239
|
-
|
1262
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1263
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1240
1264
|
for prob_func in PROB_FUNCTIONS:
|
1241
1265
|
if hasattr(self, prob_func):
|
1242
1266
|
output_cols_prefix: str = f"{prob_func}_"
|
1243
1267
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1244
1268
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1245
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1269
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1270
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1246
1271
|
|
1247
1272
|
@property
|
1248
1273
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -210,7 +212,6 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
210
212
|
sample_weight_col: Optional[str] = None,
|
211
213
|
) -> None:
|
212
214
|
super().__init__()
|
213
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
214
215
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
215
216
|
|
216
217
|
self._deps = list(deps)
|
@@ -234,6 +235,15 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
234
235
|
self.set_drop_input_cols(drop_input_cols)
|
235
236
|
self.set_sample_weight_col(sample_weight_col)
|
236
237
|
|
238
|
+
def _get_rand_id(self) -> str:
|
239
|
+
"""
|
240
|
+
Generate random id to be used in sproc and stage names.
|
241
|
+
|
242
|
+
Returns:
|
243
|
+
Random id string usable in sproc, table, and stage names.
|
244
|
+
"""
|
245
|
+
return str(uuid4()).replace("-", "_").upper()
|
246
|
+
|
237
247
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
238
248
|
"""
|
239
249
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -312,7 +322,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
312
322
|
cp.dump(self._sklearn_object, local_transform_file)
|
313
323
|
|
314
324
|
# Create temp stage to run fit.
|
315
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
325
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
316
326
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
317
327
|
SqlResultValidator(
|
318
328
|
session=session,
|
@@ -325,11 +335,12 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
325
335
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
326
336
|
).validate()
|
327
337
|
|
328
|
-
|
338
|
+
# Use posixpath to construct stage paths
|
339
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
340
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
329
341
|
local_result_file_name = get_temp_file_path()
|
330
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
331
342
|
|
332
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
343
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
333
344
|
statement_params = telemetry.get_function_usage_statement_params(
|
334
345
|
project=_PROJECT,
|
335
346
|
subproject=_SUBPROJECT,
|
@@ -355,6 +366,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
355
366
|
replace=True,
|
356
367
|
session=session,
|
357
368
|
statement_params=statement_params,
|
369
|
+
anonymous=True
|
358
370
|
)
|
359
371
|
def fit_wrapper_sproc(
|
360
372
|
session: Session,
|
@@ -363,7 +375,8 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
363
375
|
stage_result_file_name: str,
|
364
376
|
input_cols: List[str],
|
365
377
|
label_cols: List[str],
|
366
|
-
sample_weight_col: Optional[str]
|
378
|
+
sample_weight_col: Optional[str],
|
379
|
+
statement_params: Dict[str, str]
|
367
380
|
) -> str:
|
368
381
|
import cloudpickle as cp
|
369
382
|
import numpy as np
|
@@ -430,15 +443,15 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
430
443
|
api_calls=[Session.call],
|
431
444
|
custom_tags=dict([("autogen", True)]),
|
432
445
|
)
|
433
|
-
sproc_export_file_name =
|
434
|
-
|
446
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
447
|
+
session,
|
435
448
|
query,
|
436
449
|
stage_transform_file_name,
|
437
450
|
stage_result_file_name,
|
438
451
|
identifier.get_unescaped_names(self.input_cols),
|
439
452
|
identifier.get_unescaped_names(self.label_cols),
|
440
453
|
identifier.get_unescaped_names(self.sample_weight_col),
|
441
|
-
statement_params
|
454
|
+
statement_params,
|
442
455
|
)
|
443
456
|
|
444
457
|
if "|" in sproc_export_file_name:
|
@@ -448,7 +461,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
448
461
|
print("\n".join(fields[1:]))
|
449
462
|
|
450
463
|
session.file.get(
|
451
|
-
|
464
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
452
465
|
local_result_file_name,
|
453
466
|
statement_params=statement_params
|
454
467
|
)
|
@@ -494,7 +507,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
494
507
|
|
495
508
|
# Register vectorized UDF for batch inference
|
496
509
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
497
|
-
safe_id=self.
|
510
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
498
511
|
|
499
512
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
500
513
|
# will try to pickle all of self which fails.
|
@@ -586,7 +599,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
586
599
|
return transformed_pandas_df.to_dict("records")
|
587
600
|
|
588
601
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
589
|
-
safe_id=self.
|
602
|
+
safe_id=self._get_rand_id()
|
590
603
|
)
|
591
604
|
|
592
605
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -753,11 +766,18 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
753
766
|
Transformed dataset.
|
754
767
|
"""
|
755
768
|
if isinstance(dataset, DataFrame):
|
769
|
+
expected_type_inferred = "float"
|
770
|
+
# when it is classifier, infer the datatype from label columns
|
771
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
772
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
773
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
774
|
+
)
|
775
|
+
|
756
776
|
output_df = self._batch_inference(
|
757
777
|
dataset=dataset,
|
758
778
|
inference_method="predict",
|
759
779
|
expected_output_cols_list=self.output_cols,
|
760
|
-
expected_output_cols_type=
|
780
|
+
expected_output_cols_type=expected_type_inferred,
|
761
781
|
)
|
762
782
|
elif isinstance(dataset, pd.DataFrame):
|
763
783
|
output_df = self._sklearn_inference(
|
@@ -828,10 +848,10 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
828
848
|
|
829
849
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
830
850
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
831
|
-
Returns
|
851
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
832
852
|
"""
|
833
853
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
834
|
-
return []
|
854
|
+
return [output_cols_prefix]
|
835
855
|
|
836
856
|
classes = self._sklearn_object.classes_
|
837
857
|
if isinstance(classes, numpy.ndarray):
|
@@ -1056,7 +1076,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
1056
1076
|
cp.dump(self._sklearn_object, local_score_file)
|
1057
1077
|
|
1058
1078
|
# Create temp stage to run score.
|
1059
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1079
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1060
1080
|
session = dataset._session
|
1061
1081
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1062
1082
|
SqlResultValidator(
|
@@ -1070,8 +1090,9 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
1070
1090
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1071
1091
|
).validate()
|
1072
1092
|
|
1073
|
-
|
1074
|
-
|
1093
|
+
# Use posixpath to construct stage paths
|
1094
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1095
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1075
1096
|
statement_params = telemetry.get_function_usage_statement_params(
|
1076
1097
|
project=_PROJECT,
|
1077
1098
|
subproject=_SUBPROJECT,
|
@@ -1097,6 +1118,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
1097
1118
|
replace=True,
|
1098
1119
|
session=session,
|
1099
1120
|
statement_params=statement_params,
|
1121
|
+
anonymous=True
|
1100
1122
|
)
|
1101
1123
|
def score_wrapper_sproc(
|
1102
1124
|
session: Session,
|
@@ -1104,7 +1126,8 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
1104
1126
|
stage_score_file_name: str,
|
1105
1127
|
input_cols: List[str],
|
1106
1128
|
label_cols: List[str],
|
1107
|
-
sample_weight_col: Optional[str]
|
1129
|
+
sample_weight_col: Optional[str],
|
1130
|
+
statement_params: Dict[str, str]
|
1108
1131
|
) -> float:
|
1109
1132
|
import cloudpickle as cp
|
1110
1133
|
import numpy as np
|
@@ -1154,14 +1177,14 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
1154
1177
|
api_calls=[Session.call],
|
1155
1178
|
custom_tags=dict([("autogen", True)]),
|
1156
1179
|
)
|
1157
|
-
score =
|
1158
|
-
|
1180
|
+
score = score_wrapper_sproc(
|
1181
|
+
session,
|
1159
1182
|
query,
|
1160
1183
|
stage_score_file_name,
|
1161
1184
|
identifier.get_unescaped_names(self.input_cols),
|
1162
1185
|
identifier.get_unescaped_names(self.label_cols),
|
1163
1186
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1164
|
-
statement_params
|
1187
|
+
statement_params,
|
1165
1188
|
)
|
1166
1189
|
|
1167
1190
|
cleanup_temp_files([local_score_file_name])
|
@@ -1179,18 +1202,20 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
1179
1202
|
if self._sklearn_object._estimator_type == 'classifier':
|
1180
1203
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1181
1204
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1182
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1205
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1206
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1183
1207
|
# For regressor, the type of predict is float64
|
1184
1208
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1185
1209
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1186
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1187
|
-
|
1210
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1211
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1188
1212
|
for prob_func in PROB_FUNCTIONS:
|
1189
1213
|
if hasattr(self, prob_func):
|
1190
1214
|
output_cols_prefix: str = f"{prob_func}_"
|
1191
1215
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1192
1216
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1193
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1217
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1218
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1194
1219
|
|
1195
1220
|
@property
|
1196
1221
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|