snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -226,7 +228,6 @@ class KernelRidge(BaseTransformer):
|
|
226
228
|
sample_weight_col: Optional[str] = None,
|
227
229
|
) -> None:
|
228
230
|
super().__init__()
|
229
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
230
231
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
231
232
|
|
232
233
|
self._deps = list(deps)
|
@@ -251,6 +252,15 @@ class KernelRidge(BaseTransformer):
|
|
251
252
|
self.set_drop_input_cols(drop_input_cols)
|
252
253
|
self.set_sample_weight_col(sample_weight_col)
|
253
254
|
|
255
|
+
def _get_rand_id(self) -> str:
|
256
|
+
"""
|
257
|
+
Generate random id to be used in sproc and stage names.
|
258
|
+
|
259
|
+
Returns:
|
260
|
+
Random id string usable in sproc, table, and stage names.
|
261
|
+
"""
|
262
|
+
return str(uuid4()).replace("-", "_").upper()
|
263
|
+
|
254
264
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
255
265
|
"""
|
256
266
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -329,7 +339,7 @@ class KernelRidge(BaseTransformer):
|
|
329
339
|
cp.dump(self._sklearn_object, local_transform_file)
|
330
340
|
|
331
341
|
# Create temp stage to run fit.
|
332
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
342
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
333
343
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
334
344
|
SqlResultValidator(
|
335
345
|
session=session,
|
@@ -342,11 +352,12 @@ class KernelRidge(BaseTransformer):
|
|
342
352
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
343
353
|
).validate()
|
344
354
|
|
345
|
-
|
355
|
+
# Use posixpath to construct stage paths
|
356
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
357
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
358
|
local_result_file_name = get_temp_file_path()
|
347
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
348
359
|
|
349
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
360
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
350
361
|
statement_params = telemetry.get_function_usage_statement_params(
|
351
362
|
project=_PROJECT,
|
352
363
|
subproject=_SUBPROJECT,
|
@@ -372,6 +383,7 @@ class KernelRidge(BaseTransformer):
|
|
372
383
|
replace=True,
|
373
384
|
session=session,
|
374
385
|
statement_params=statement_params,
|
386
|
+
anonymous=True
|
375
387
|
)
|
376
388
|
def fit_wrapper_sproc(
|
377
389
|
session: Session,
|
@@ -380,7 +392,8 @@ class KernelRidge(BaseTransformer):
|
|
380
392
|
stage_result_file_name: str,
|
381
393
|
input_cols: List[str],
|
382
394
|
label_cols: List[str],
|
383
|
-
sample_weight_col: Optional[str]
|
395
|
+
sample_weight_col: Optional[str],
|
396
|
+
statement_params: Dict[str, str]
|
384
397
|
) -> str:
|
385
398
|
import cloudpickle as cp
|
386
399
|
import numpy as np
|
@@ -447,15 +460,15 @@ class KernelRidge(BaseTransformer):
|
|
447
460
|
api_calls=[Session.call],
|
448
461
|
custom_tags=dict([("autogen", True)]),
|
449
462
|
)
|
450
|
-
sproc_export_file_name =
|
451
|
-
|
463
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
464
|
+
session,
|
452
465
|
query,
|
453
466
|
stage_transform_file_name,
|
454
467
|
stage_result_file_name,
|
455
468
|
identifier.get_unescaped_names(self.input_cols),
|
456
469
|
identifier.get_unescaped_names(self.label_cols),
|
457
470
|
identifier.get_unescaped_names(self.sample_weight_col),
|
458
|
-
statement_params
|
471
|
+
statement_params,
|
459
472
|
)
|
460
473
|
|
461
474
|
if "|" in sproc_export_file_name:
|
@@ -465,7 +478,7 @@ class KernelRidge(BaseTransformer):
|
|
465
478
|
print("\n".join(fields[1:]))
|
466
479
|
|
467
480
|
session.file.get(
|
468
|
-
|
481
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
469
482
|
local_result_file_name,
|
470
483
|
statement_params=statement_params
|
471
484
|
)
|
@@ -511,7 +524,7 @@ class KernelRidge(BaseTransformer):
|
|
511
524
|
|
512
525
|
# Register vectorized UDF for batch inference
|
513
526
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
514
|
-
safe_id=self.
|
527
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
515
528
|
|
516
529
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
517
530
|
# will try to pickle all of self which fails.
|
@@ -603,7 +616,7 @@ class KernelRidge(BaseTransformer):
|
|
603
616
|
return transformed_pandas_df.to_dict("records")
|
604
617
|
|
605
618
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
606
|
-
safe_id=self.
|
619
|
+
safe_id=self._get_rand_id()
|
607
620
|
)
|
608
621
|
|
609
622
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -770,11 +783,18 @@ class KernelRidge(BaseTransformer):
|
|
770
783
|
Transformed dataset.
|
771
784
|
"""
|
772
785
|
if isinstance(dataset, DataFrame):
|
786
|
+
expected_type_inferred = "float"
|
787
|
+
# when it is classifier, infer the datatype from label columns
|
788
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
789
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
790
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
791
|
+
)
|
792
|
+
|
773
793
|
output_df = self._batch_inference(
|
774
794
|
dataset=dataset,
|
775
795
|
inference_method="predict",
|
776
796
|
expected_output_cols_list=self.output_cols,
|
777
|
-
expected_output_cols_type=
|
797
|
+
expected_output_cols_type=expected_type_inferred,
|
778
798
|
)
|
779
799
|
elif isinstance(dataset, pd.DataFrame):
|
780
800
|
output_df = self._sklearn_inference(
|
@@ -845,10 +865,10 @@ class KernelRidge(BaseTransformer):
|
|
845
865
|
|
846
866
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
847
867
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
848
|
-
Returns
|
868
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
849
869
|
"""
|
850
870
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
851
|
-
return []
|
871
|
+
return [output_cols_prefix]
|
852
872
|
|
853
873
|
classes = self._sklearn_object.classes_
|
854
874
|
if isinstance(classes, numpy.ndarray):
|
@@ -1073,7 +1093,7 @@ class KernelRidge(BaseTransformer):
|
|
1073
1093
|
cp.dump(self._sklearn_object, local_score_file)
|
1074
1094
|
|
1075
1095
|
# Create temp stage to run score.
|
1076
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1096
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1077
1097
|
session = dataset._session
|
1078
1098
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1079
1099
|
SqlResultValidator(
|
@@ -1087,8 +1107,9 @@ class KernelRidge(BaseTransformer):
|
|
1087
1107
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1088
1108
|
).validate()
|
1089
1109
|
|
1090
|
-
|
1091
|
-
|
1110
|
+
# Use posixpath to construct stage paths
|
1111
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1112
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1092
1113
|
statement_params = telemetry.get_function_usage_statement_params(
|
1093
1114
|
project=_PROJECT,
|
1094
1115
|
subproject=_SUBPROJECT,
|
@@ -1114,6 +1135,7 @@ class KernelRidge(BaseTransformer):
|
|
1114
1135
|
replace=True,
|
1115
1136
|
session=session,
|
1116
1137
|
statement_params=statement_params,
|
1138
|
+
anonymous=True
|
1117
1139
|
)
|
1118
1140
|
def score_wrapper_sproc(
|
1119
1141
|
session: Session,
|
@@ -1121,7 +1143,8 @@ class KernelRidge(BaseTransformer):
|
|
1121
1143
|
stage_score_file_name: str,
|
1122
1144
|
input_cols: List[str],
|
1123
1145
|
label_cols: List[str],
|
1124
|
-
sample_weight_col: Optional[str]
|
1146
|
+
sample_weight_col: Optional[str],
|
1147
|
+
statement_params: Dict[str, str]
|
1125
1148
|
) -> float:
|
1126
1149
|
import cloudpickle as cp
|
1127
1150
|
import numpy as np
|
@@ -1171,14 +1194,14 @@ class KernelRidge(BaseTransformer):
|
|
1171
1194
|
api_calls=[Session.call],
|
1172
1195
|
custom_tags=dict([("autogen", True)]),
|
1173
1196
|
)
|
1174
|
-
score =
|
1175
|
-
|
1197
|
+
score = score_wrapper_sproc(
|
1198
|
+
session,
|
1176
1199
|
query,
|
1177
1200
|
stage_score_file_name,
|
1178
1201
|
identifier.get_unescaped_names(self.input_cols),
|
1179
1202
|
identifier.get_unescaped_names(self.label_cols),
|
1180
1203
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1181
|
-
statement_params
|
1204
|
+
statement_params,
|
1182
1205
|
)
|
1183
1206
|
|
1184
1207
|
cleanup_temp_files([local_score_file_name])
|
@@ -1196,18 +1219,20 @@ class KernelRidge(BaseTransformer):
|
|
1196
1219
|
if self._sklearn_object._estimator_type == 'classifier':
|
1197
1220
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1198
1221
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1199
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1222
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1223
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1200
1224
|
# For regressor, the type of predict is float64
|
1201
1225
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1202
1226
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1203
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1204
|
-
|
1227
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1228
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1205
1229
|
for prob_func in PROB_FUNCTIONS:
|
1206
1230
|
if hasattr(self, prob_func):
|
1207
1231
|
output_cols_prefix: str = f"{prob_func}_"
|
1208
1232
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1209
1233
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1210
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1234
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1235
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1211
1236
|
|
1212
1237
|
@property
|
1213
1238
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
26
27
|
from snowflake.snowpark import DataFrame, Session
|
27
28
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
28
29
|
from snowflake.snowpark.types import PandasSeries
|
30
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
31
|
|
30
32
|
from snowflake.ml.model.model_signature import (
|
31
33
|
DataType,
|
@@ -200,7 +202,6 @@ class LGBMClassifier(BaseTransformer):
|
|
200
202
|
**kwargs,
|
201
203
|
) -> None:
|
202
204
|
super().__init__()
|
203
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
204
205
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'])
|
205
206
|
|
206
207
|
self._deps = list(deps)
|
@@ -240,6 +241,15 @@ class LGBMClassifier(BaseTransformer):
|
|
240
241
|
self.set_drop_input_cols(drop_input_cols)
|
241
242
|
self.set_sample_weight_col(sample_weight_col)
|
242
243
|
|
244
|
+
def _get_rand_id(self) -> str:
|
245
|
+
"""
|
246
|
+
Generate random id to be used in sproc and stage names.
|
247
|
+
|
248
|
+
Returns:
|
249
|
+
Random id string usable in sproc, table, and stage names.
|
250
|
+
"""
|
251
|
+
return str(uuid4()).replace("-", "_").upper()
|
252
|
+
|
243
253
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
244
254
|
"""
|
245
255
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -318,7 +328,7 @@ class LGBMClassifier(BaseTransformer):
|
|
318
328
|
cp.dump(self._sklearn_object, local_transform_file)
|
319
329
|
|
320
330
|
# Create temp stage to run fit.
|
321
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
331
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
322
332
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
323
333
|
SqlResultValidator(
|
324
334
|
session=session,
|
@@ -331,11 +341,12 @@ class LGBMClassifier(BaseTransformer):
|
|
331
341
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
332
342
|
).validate()
|
333
343
|
|
334
|
-
|
344
|
+
# Use posixpath to construct stage paths
|
345
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
335
347
|
local_result_file_name = get_temp_file_path()
|
336
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
337
348
|
|
338
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
349
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
339
350
|
statement_params = telemetry.get_function_usage_statement_params(
|
340
351
|
project=_PROJECT,
|
341
352
|
subproject=_SUBPROJECT,
|
@@ -361,6 +372,7 @@ class LGBMClassifier(BaseTransformer):
|
|
361
372
|
replace=True,
|
362
373
|
session=session,
|
363
374
|
statement_params=statement_params,
|
375
|
+
anonymous=True
|
364
376
|
)
|
365
377
|
def fit_wrapper_sproc(
|
366
378
|
session: Session,
|
@@ -369,7 +381,8 @@ class LGBMClassifier(BaseTransformer):
|
|
369
381
|
stage_result_file_name: str,
|
370
382
|
input_cols: List[str],
|
371
383
|
label_cols: List[str],
|
372
|
-
sample_weight_col: Optional[str]
|
384
|
+
sample_weight_col: Optional[str],
|
385
|
+
statement_params: Dict[str, str]
|
373
386
|
) -> str:
|
374
387
|
import cloudpickle as cp
|
375
388
|
import numpy as np
|
@@ -436,15 +449,15 @@ class LGBMClassifier(BaseTransformer):
|
|
436
449
|
api_calls=[Session.call],
|
437
450
|
custom_tags=dict([("autogen", True)]),
|
438
451
|
)
|
439
|
-
sproc_export_file_name =
|
440
|
-
|
452
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
453
|
+
session,
|
441
454
|
query,
|
442
455
|
stage_transform_file_name,
|
443
456
|
stage_result_file_name,
|
444
457
|
identifier.get_unescaped_names(self.input_cols),
|
445
458
|
identifier.get_unescaped_names(self.label_cols),
|
446
459
|
identifier.get_unescaped_names(self.sample_weight_col),
|
447
|
-
statement_params
|
460
|
+
statement_params,
|
448
461
|
)
|
449
462
|
|
450
463
|
if "|" in sproc_export_file_name:
|
@@ -454,7 +467,7 @@ class LGBMClassifier(BaseTransformer):
|
|
454
467
|
print("\n".join(fields[1:]))
|
455
468
|
|
456
469
|
session.file.get(
|
457
|
-
|
470
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
458
471
|
local_result_file_name,
|
459
472
|
statement_params=statement_params
|
460
473
|
)
|
@@ -500,7 +513,7 @@ class LGBMClassifier(BaseTransformer):
|
|
500
513
|
|
501
514
|
# Register vectorized UDF for batch inference
|
502
515
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
503
|
-
safe_id=self.
|
516
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
504
517
|
|
505
518
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
506
519
|
# will try to pickle all of self which fails.
|
@@ -592,7 +605,7 @@ class LGBMClassifier(BaseTransformer):
|
|
592
605
|
return transformed_pandas_df.to_dict("records")
|
593
606
|
|
594
607
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
595
|
-
safe_id=self.
|
608
|
+
safe_id=self._get_rand_id()
|
596
609
|
)
|
597
610
|
|
598
611
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -759,11 +772,18 @@ class LGBMClassifier(BaseTransformer):
|
|
759
772
|
Transformed dataset.
|
760
773
|
"""
|
761
774
|
if isinstance(dataset, DataFrame):
|
775
|
+
expected_type_inferred = ""
|
776
|
+
# when it is classifier, infer the datatype from label columns
|
777
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
778
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
779
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
780
|
+
)
|
781
|
+
|
762
782
|
output_df = self._batch_inference(
|
763
783
|
dataset=dataset,
|
764
784
|
inference_method="predict",
|
765
785
|
expected_output_cols_list=self.output_cols,
|
766
|
-
expected_output_cols_type=
|
786
|
+
expected_output_cols_type=expected_type_inferred,
|
767
787
|
)
|
768
788
|
elif isinstance(dataset, pd.DataFrame):
|
769
789
|
output_df = self._sklearn_inference(
|
@@ -834,10 +854,10 @@ class LGBMClassifier(BaseTransformer):
|
|
834
854
|
|
835
855
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
836
856
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
837
|
-
Returns
|
857
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
838
858
|
"""
|
839
859
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
840
|
-
return []
|
860
|
+
return [output_cols_prefix]
|
841
861
|
|
842
862
|
classes = self._sklearn_object.classes_
|
843
863
|
if isinstance(classes, numpy.ndarray):
|
@@ -1066,7 +1086,7 @@ class LGBMClassifier(BaseTransformer):
|
|
1066
1086
|
cp.dump(self._sklearn_object, local_score_file)
|
1067
1087
|
|
1068
1088
|
# Create temp stage to run score.
|
1069
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1089
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1070
1090
|
session = dataset._session
|
1071
1091
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1072
1092
|
SqlResultValidator(
|
@@ -1080,8 +1100,9 @@ class LGBMClassifier(BaseTransformer):
|
|
1080
1100
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1081
1101
|
).validate()
|
1082
1102
|
|
1083
|
-
|
1084
|
-
|
1103
|
+
# Use posixpath to construct stage paths
|
1104
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1105
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1085
1106
|
statement_params = telemetry.get_function_usage_statement_params(
|
1086
1107
|
project=_PROJECT,
|
1087
1108
|
subproject=_SUBPROJECT,
|
@@ -1107,6 +1128,7 @@ class LGBMClassifier(BaseTransformer):
|
|
1107
1128
|
replace=True,
|
1108
1129
|
session=session,
|
1109
1130
|
statement_params=statement_params,
|
1131
|
+
anonymous=True
|
1110
1132
|
)
|
1111
1133
|
def score_wrapper_sproc(
|
1112
1134
|
session: Session,
|
@@ -1114,7 +1136,8 @@ class LGBMClassifier(BaseTransformer):
|
|
1114
1136
|
stage_score_file_name: str,
|
1115
1137
|
input_cols: List[str],
|
1116
1138
|
label_cols: List[str],
|
1117
|
-
sample_weight_col: Optional[str]
|
1139
|
+
sample_weight_col: Optional[str],
|
1140
|
+
statement_params: Dict[str, str]
|
1118
1141
|
) -> float:
|
1119
1142
|
import cloudpickle as cp
|
1120
1143
|
import numpy as np
|
@@ -1164,14 +1187,14 @@ class LGBMClassifier(BaseTransformer):
|
|
1164
1187
|
api_calls=[Session.call],
|
1165
1188
|
custom_tags=dict([("autogen", True)]),
|
1166
1189
|
)
|
1167
|
-
score =
|
1168
|
-
|
1190
|
+
score = score_wrapper_sproc(
|
1191
|
+
session,
|
1169
1192
|
query,
|
1170
1193
|
stage_score_file_name,
|
1171
1194
|
identifier.get_unescaped_names(self.input_cols),
|
1172
1195
|
identifier.get_unescaped_names(self.label_cols),
|
1173
1196
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1174
|
-
statement_params
|
1197
|
+
statement_params,
|
1175
1198
|
)
|
1176
1199
|
|
1177
1200
|
cleanup_temp_files([local_score_file_name])
|
@@ -1189,18 +1212,20 @@ class LGBMClassifier(BaseTransformer):
|
|
1189
1212
|
if self._sklearn_object._estimator_type == 'classifier':
|
1190
1213
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1191
1214
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1192
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1215
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1216
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1193
1217
|
# For regressor, the type of predict is float64
|
1194
1218
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1195
1219
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1196
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1197
|
-
|
1220
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1198
1222
|
for prob_func in PROB_FUNCTIONS:
|
1199
1223
|
if hasattr(self, prob_func):
|
1200
1224
|
output_cols_prefix: str = f"{prob_func}_"
|
1201
1225
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1202
1226
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1203
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1227
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1228
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1204
1229
|
|
1205
1230
|
@property
|
1206
1231
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|