snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
26
27
|
from snowflake.snowpark import DataFrame, Session
|
27
28
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
28
29
|
from snowflake.snowpark.types import PandasSeries
|
30
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
31
|
|
30
32
|
from snowflake.ml.model.model_signature import (
|
31
33
|
DataType,
|
@@ -200,7 +202,6 @@ class LGBMRegressor(BaseTransformer):
|
|
200
202
|
**kwargs,
|
201
203
|
) -> None:
|
202
204
|
super().__init__()
|
203
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
204
205
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'])
|
205
206
|
|
206
207
|
self._deps = list(deps)
|
@@ -240,6 +241,15 @@ class LGBMRegressor(BaseTransformer):
|
|
240
241
|
self.set_drop_input_cols(drop_input_cols)
|
241
242
|
self.set_sample_weight_col(sample_weight_col)
|
242
243
|
|
244
|
+
def _get_rand_id(self) -> str:
|
245
|
+
"""
|
246
|
+
Generate random id to be used in sproc and stage names.
|
247
|
+
|
248
|
+
Returns:
|
249
|
+
Random id string usable in sproc, table, and stage names.
|
250
|
+
"""
|
251
|
+
return str(uuid4()).replace("-", "_").upper()
|
252
|
+
|
243
253
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
244
254
|
"""
|
245
255
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -318,7 +328,7 @@ class LGBMRegressor(BaseTransformer):
|
|
318
328
|
cp.dump(self._sklearn_object, local_transform_file)
|
319
329
|
|
320
330
|
# Create temp stage to run fit.
|
321
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
331
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
322
332
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
323
333
|
SqlResultValidator(
|
324
334
|
session=session,
|
@@ -331,11 +341,12 @@ class LGBMRegressor(BaseTransformer):
|
|
331
341
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
332
342
|
).validate()
|
333
343
|
|
334
|
-
|
344
|
+
# Use posixpath to construct stage paths
|
345
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
335
347
|
local_result_file_name = get_temp_file_path()
|
336
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
337
348
|
|
338
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
349
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
339
350
|
statement_params = telemetry.get_function_usage_statement_params(
|
340
351
|
project=_PROJECT,
|
341
352
|
subproject=_SUBPROJECT,
|
@@ -361,6 +372,7 @@ class LGBMRegressor(BaseTransformer):
|
|
361
372
|
replace=True,
|
362
373
|
session=session,
|
363
374
|
statement_params=statement_params,
|
375
|
+
anonymous=True
|
364
376
|
)
|
365
377
|
def fit_wrapper_sproc(
|
366
378
|
session: Session,
|
@@ -369,7 +381,8 @@ class LGBMRegressor(BaseTransformer):
|
|
369
381
|
stage_result_file_name: str,
|
370
382
|
input_cols: List[str],
|
371
383
|
label_cols: List[str],
|
372
|
-
sample_weight_col: Optional[str]
|
384
|
+
sample_weight_col: Optional[str],
|
385
|
+
statement_params: Dict[str, str]
|
373
386
|
) -> str:
|
374
387
|
import cloudpickle as cp
|
375
388
|
import numpy as np
|
@@ -436,15 +449,15 @@ class LGBMRegressor(BaseTransformer):
|
|
436
449
|
api_calls=[Session.call],
|
437
450
|
custom_tags=dict([("autogen", True)]),
|
438
451
|
)
|
439
|
-
sproc_export_file_name =
|
440
|
-
|
452
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
453
|
+
session,
|
441
454
|
query,
|
442
455
|
stage_transform_file_name,
|
443
456
|
stage_result_file_name,
|
444
457
|
identifier.get_unescaped_names(self.input_cols),
|
445
458
|
identifier.get_unescaped_names(self.label_cols),
|
446
459
|
identifier.get_unescaped_names(self.sample_weight_col),
|
447
|
-
statement_params
|
460
|
+
statement_params,
|
448
461
|
)
|
449
462
|
|
450
463
|
if "|" in sproc_export_file_name:
|
@@ -454,7 +467,7 @@ class LGBMRegressor(BaseTransformer):
|
|
454
467
|
print("\n".join(fields[1:]))
|
455
468
|
|
456
469
|
session.file.get(
|
457
|
-
|
470
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
458
471
|
local_result_file_name,
|
459
472
|
statement_params=statement_params
|
460
473
|
)
|
@@ -500,7 +513,7 @@ class LGBMRegressor(BaseTransformer):
|
|
500
513
|
|
501
514
|
# Register vectorized UDF for batch inference
|
502
515
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
503
|
-
safe_id=self.
|
516
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
504
517
|
|
505
518
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
506
519
|
# will try to pickle all of self which fails.
|
@@ -592,7 +605,7 @@ class LGBMRegressor(BaseTransformer):
|
|
592
605
|
return transformed_pandas_df.to_dict("records")
|
593
606
|
|
594
607
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
595
|
-
safe_id=self.
|
608
|
+
safe_id=self._get_rand_id()
|
596
609
|
)
|
597
610
|
|
598
611
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -759,11 +772,18 @@ class LGBMRegressor(BaseTransformer):
|
|
759
772
|
Transformed dataset.
|
760
773
|
"""
|
761
774
|
if isinstance(dataset, DataFrame):
|
775
|
+
expected_type_inferred = "float"
|
776
|
+
# when it is classifier, infer the datatype from label columns
|
777
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
778
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
779
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
780
|
+
)
|
781
|
+
|
762
782
|
output_df = self._batch_inference(
|
763
783
|
dataset=dataset,
|
764
784
|
inference_method="predict",
|
765
785
|
expected_output_cols_list=self.output_cols,
|
766
|
-
expected_output_cols_type=
|
786
|
+
expected_output_cols_type=expected_type_inferred,
|
767
787
|
)
|
768
788
|
elif isinstance(dataset, pd.DataFrame):
|
769
789
|
output_df = self._sklearn_inference(
|
@@ -834,10 +854,10 @@ class LGBMRegressor(BaseTransformer):
|
|
834
854
|
|
835
855
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
836
856
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
837
|
-
Returns
|
857
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
838
858
|
"""
|
839
859
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
840
|
-
return []
|
860
|
+
return [output_cols_prefix]
|
841
861
|
|
842
862
|
classes = self._sklearn_object.classes_
|
843
863
|
if isinstance(classes, numpy.ndarray):
|
@@ -1062,7 +1082,7 @@ class LGBMRegressor(BaseTransformer):
|
|
1062
1082
|
cp.dump(self._sklearn_object, local_score_file)
|
1063
1083
|
|
1064
1084
|
# Create temp stage to run score.
|
1065
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1085
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1066
1086
|
session = dataset._session
|
1067
1087
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1068
1088
|
SqlResultValidator(
|
@@ -1076,8 +1096,9 @@ class LGBMRegressor(BaseTransformer):
|
|
1076
1096
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1077
1097
|
).validate()
|
1078
1098
|
|
1079
|
-
|
1080
|
-
|
1099
|
+
# Use posixpath to construct stage paths
|
1100
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1101
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1081
1102
|
statement_params = telemetry.get_function_usage_statement_params(
|
1082
1103
|
project=_PROJECT,
|
1083
1104
|
subproject=_SUBPROJECT,
|
@@ -1103,6 +1124,7 @@ class LGBMRegressor(BaseTransformer):
|
|
1103
1124
|
replace=True,
|
1104
1125
|
session=session,
|
1105
1126
|
statement_params=statement_params,
|
1127
|
+
anonymous=True
|
1106
1128
|
)
|
1107
1129
|
def score_wrapper_sproc(
|
1108
1130
|
session: Session,
|
@@ -1110,7 +1132,8 @@ class LGBMRegressor(BaseTransformer):
|
|
1110
1132
|
stage_score_file_name: str,
|
1111
1133
|
input_cols: List[str],
|
1112
1134
|
label_cols: List[str],
|
1113
|
-
sample_weight_col: Optional[str]
|
1135
|
+
sample_weight_col: Optional[str],
|
1136
|
+
statement_params: Dict[str, str]
|
1114
1137
|
) -> float:
|
1115
1138
|
import cloudpickle as cp
|
1116
1139
|
import numpy as np
|
@@ -1160,14 +1183,14 @@ class LGBMRegressor(BaseTransformer):
|
|
1160
1183
|
api_calls=[Session.call],
|
1161
1184
|
custom_tags=dict([("autogen", True)]),
|
1162
1185
|
)
|
1163
|
-
score =
|
1164
|
-
|
1186
|
+
score = score_wrapper_sproc(
|
1187
|
+
session,
|
1165
1188
|
query,
|
1166
1189
|
stage_score_file_name,
|
1167
1190
|
identifier.get_unescaped_names(self.input_cols),
|
1168
1191
|
identifier.get_unescaped_names(self.label_cols),
|
1169
1192
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1170
|
-
statement_params
|
1193
|
+
statement_params,
|
1171
1194
|
)
|
1172
1195
|
|
1173
1196
|
cleanup_temp_files([local_score_file_name])
|
@@ -1185,18 +1208,20 @@ class LGBMRegressor(BaseTransformer):
|
|
1185
1208
|
if self._sklearn_object._estimator_type == 'classifier':
|
1186
1209
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1187
1210
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1188
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1211
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1212
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1189
1213
|
# For regressor, the type of predict is float64
|
1190
1214
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1191
1215
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1192
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1193
|
-
|
1216
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1217
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1194
1218
|
for prob_func in PROB_FUNCTIONS:
|
1195
1219
|
if hasattr(self, prob_func):
|
1196
1220
|
output_cols_prefix: str = f"{prob_func}_"
|
1197
1221
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1198
1222
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1199
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1223
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1224
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1200
1225
|
|
1201
1226
|
@property
|
1202
1227
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -230,7 +232,6 @@ class ARDRegression(BaseTransformer):
|
|
230
232
|
sample_weight_col: Optional[str] = None,
|
231
233
|
) -> None:
|
232
234
|
super().__init__()
|
233
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
234
235
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
235
236
|
|
236
237
|
self._deps = list(deps)
|
@@ -260,6 +261,15 @@ class ARDRegression(BaseTransformer):
|
|
260
261
|
self.set_drop_input_cols(drop_input_cols)
|
261
262
|
self.set_sample_weight_col(sample_weight_col)
|
262
263
|
|
264
|
+
def _get_rand_id(self) -> str:
|
265
|
+
"""
|
266
|
+
Generate random id to be used in sproc and stage names.
|
267
|
+
|
268
|
+
Returns:
|
269
|
+
Random id string usable in sproc, table, and stage names.
|
270
|
+
"""
|
271
|
+
return str(uuid4()).replace("-", "_").upper()
|
272
|
+
|
263
273
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
264
274
|
"""
|
265
275
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -338,7 +348,7 @@ class ARDRegression(BaseTransformer):
|
|
338
348
|
cp.dump(self._sklearn_object, local_transform_file)
|
339
349
|
|
340
350
|
# Create temp stage to run fit.
|
341
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
351
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
342
352
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
343
353
|
SqlResultValidator(
|
344
354
|
session=session,
|
@@ -351,11 +361,12 @@ class ARDRegression(BaseTransformer):
|
|
351
361
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
352
362
|
).validate()
|
353
363
|
|
354
|
-
|
364
|
+
# Use posixpath to construct stage paths
|
365
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
366
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
355
367
|
local_result_file_name = get_temp_file_path()
|
356
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
357
368
|
|
358
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
369
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
359
370
|
statement_params = telemetry.get_function_usage_statement_params(
|
360
371
|
project=_PROJECT,
|
361
372
|
subproject=_SUBPROJECT,
|
@@ -381,6 +392,7 @@ class ARDRegression(BaseTransformer):
|
|
381
392
|
replace=True,
|
382
393
|
session=session,
|
383
394
|
statement_params=statement_params,
|
395
|
+
anonymous=True
|
384
396
|
)
|
385
397
|
def fit_wrapper_sproc(
|
386
398
|
session: Session,
|
@@ -389,7 +401,8 @@ class ARDRegression(BaseTransformer):
|
|
389
401
|
stage_result_file_name: str,
|
390
402
|
input_cols: List[str],
|
391
403
|
label_cols: List[str],
|
392
|
-
sample_weight_col: Optional[str]
|
404
|
+
sample_weight_col: Optional[str],
|
405
|
+
statement_params: Dict[str, str]
|
393
406
|
) -> str:
|
394
407
|
import cloudpickle as cp
|
395
408
|
import numpy as np
|
@@ -456,15 +469,15 @@ class ARDRegression(BaseTransformer):
|
|
456
469
|
api_calls=[Session.call],
|
457
470
|
custom_tags=dict([("autogen", True)]),
|
458
471
|
)
|
459
|
-
sproc_export_file_name =
|
460
|
-
|
472
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
473
|
+
session,
|
461
474
|
query,
|
462
475
|
stage_transform_file_name,
|
463
476
|
stage_result_file_name,
|
464
477
|
identifier.get_unescaped_names(self.input_cols),
|
465
478
|
identifier.get_unescaped_names(self.label_cols),
|
466
479
|
identifier.get_unescaped_names(self.sample_weight_col),
|
467
|
-
statement_params
|
480
|
+
statement_params,
|
468
481
|
)
|
469
482
|
|
470
483
|
if "|" in sproc_export_file_name:
|
@@ -474,7 +487,7 @@ class ARDRegression(BaseTransformer):
|
|
474
487
|
print("\n".join(fields[1:]))
|
475
488
|
|
476
489
|
session.file.get(
|
477
|
-
|
490
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
478
491
|
local_result_file_name,
|
479
492
|
statement_params=statement_params
|
480
493
|
)
|
@@ -520,7 +533,7 @@ class ARDRegression(BaseTransformer):
|
|
520
533
|
|
521
534
|
# Register vectorized UDF for batch inference
|
522
535
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
523
|
-
safe_id=self.
|
536
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
524
537
|
|
525
538
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
526
539
|
# will try to pickle all of self which fails.
|
@@ -612,7 +625,7 @@ class ARDRegression(BaseTransformer):
|
|
612
625
|
return transformed_pandas_df.to_dict("records")
|
613
626
|
|
614
627
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
615
|
-
safe_id=self.
|
628
|
+
safe_id=self._get_rand_id()
|
616
629
|
)
|
617
630
|
|
618
631
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -779,11 +792,18 @@ class ARDRegression(BaseTransformer):
|
|
779
792
|
Transformed dataset.
|
780
793
|
"""
|
781
794
|
if isinstance(dataset, DataFrame):
|
795
|
+
expected_type_inferred = "float"
|
796
|
+
# when it is classifier, infer the datatype from label columns
|
797
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
798
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
799
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
800
|
+
)
|
801
|
+
|
782
802
|
output_df = self._batch_inference(
|
783
803
|
dataset=dataset,
|
784
804
|
inference_method="predict",
|
785
805
|
expected_output_cols_list=self.output_cols,
|
786
|
-
expected_output_cols_type=
|
806
|
+
expected_output_cols_type=expected_type_inferred,
|
787
807
|
)
|
788
808
|
elif isinstance(dataset, pd.DataFrame):
|
789
809
|
output_df = self._sklearn_inference(
|
@@ -854,10 +874,10 @@ class ARDRegression(BaseTransformer):
|
|
854
874
|
|
855
875
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
856
876
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
857
|
-
Returns
|
877
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
858
878
|
"""
|
859
879
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
860
|
-
return []
|
880
|
+
return [output_cols_prefix]
|
861
881
|
|
862
882
|
classes = self._sklearn_object.classes_
|
863
883
|
if isinstance(classes, numpy.ndarray):
|
@@ -1082,7 +1102,7 @@ class ARDRegression(BaseTransformer):
|
|
1082
1102
|
cp.dump(self._sklearn_object, local_score_file)
|
1083
1103
|
|
1084
1104
|
# Create temp stage to run score.
|
1085
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1105
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1086
1106
|
session = dataset._session
|
1087
1107
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1088
1108
|
SqlResultValidator(
|
@@ -1096,8 +1116,9 @@ class ARDRegression(BaseTransformer):
|
|
1096
1116
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1097
1117
|
).validate()
|
1098
1118
|
|
1099
|
-
|
1100
|
-
|
1119
|
+
# Use posixpath to construct stage paths
|
1120
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1121
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1101
1122
|
statement_params = telemetry.get_function_usage_statement_params(
|
1102
1123
|
project=_PROJECT,
|
1103
1124
|
subproject=_SUBPROJECT,
|
@@ -1123,6 +1144,7 @@ class ARDRegression(BaseTransformer):
|
|
1123
1144
|
replace=True,
|
1124
1145
|
session=session,
|
1125
1146
|
statement_params=statement_params,
|
1147
|
+
anonymous=True
|
1126
1148
|
)
|
1127
1149
|
def score_wrapper_sproc(
|
1128
1150
|
session: Session,
|
@@ -1130,7 +1152,8 @@ class ARDRegression(BaseTransformer):
|
|
1130
1152
|
stage_score_file_name: str,
|
1131
1153
|
input_cols: List[str],
|
1132
1154
|
label_cols: List[str],
|
1133
|
-
sample_weight_col: Optional[str]
|
1155
|
+
sample_weight_col: Optional[str],
|
1156
|
+
statement_params: Dict[str, str]
|
1134
1157
|
) -> float:
|
1135
1158
|
import cloudpickle as cp
|
1136
1159
|
import numpy as np
|
@@ -1180,14 +1203,14 @@ class ARDRegression(BaseTransformer):
|
|
1180
1203
|
api_calls=[Session.call],
|
1181
1204
|
custom_tags=dict([("autogen", True)]),
|
1182
1205
|
)
|
1183
|
-
score =
|
1184
|
-
|
1206
|
+
score = score_wrapper_sproc(
|
1207
|
+
session,
|
1185
1208
|
query,
|
1186
1209
|
stage_score_file_name,
|
1187
1210
|
identifier.get_unescaped_names(self.input_cols),
|
1188
1211
|
identifier.get_unescaped_names(self.label_cols),
|
1189
1212
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1190
|
-
statement_params
|
1213
|
+
statement_params,
|
1191
1214
|
)
|
1192
1215
|
|
1193
1216
|
cleanup_temp_files([local_score_file_name])
|
@@ -1205,18 +1228,20 @@ class ARDRegression(BaseTransformer):
|
|
1205
1228
|
if self._sklearn_object._estimator_type == 'classifier':
|
1206
1229
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1207
1230
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1208
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1231
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1232
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1209
1233
|
# For regressor, the type of predict is float64
|
1210
1234
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1211
1235
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1212
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1213
|
-
|
1236
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1237
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1214
1238
|
for prob_func in PROB_FUNCTIONS:
|
1215
1239
|
if hasattr(self, prob_func):
|
1216
1240
|
output_cols_prefix: str = f"{prob_func}_"
|
1217
1241
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1218
1242
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1219
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1243
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1244
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1220
1245
|
|
1221
1246
|
@property
|
1222
1247
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|