snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -214,7 +216,6 @@ class HuberRegressor(BaseTransformer):
|
|
214
216
|
sample_weight_col: Optional[str] = None,
|
215
217
|
) -> None:
|
216
218
|
super().__init__()
|
217
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
218
219
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
219
220
|
|
220
221
|
self._deps = list(deps)
|
@@ -239,6 +240,15 @@ class HuberRegressor(BaseTransformer):
|
|
239
240
|
self.set_drop_input_cols(drop_input_cols)
|
240
241
|
self.set_sample_weight_col(sample_weight_col)
|
241
242
|
|
243
|
+
def _get_rand_id(self) -> str:
|
244
|
+
"""
|
245
|
+
Generate random id to be used in sproc and stage names.
|
246
|
+
|
247
|
+
Returns:
|
248
|
+
Random id string usable in sproc, table, and stage names.
|
249
|
+
"""
|
250
|
+
return str(uuid4()).replace("-", "_").upper()
|
251
|
+
|
242
252
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
243
253
|
"""
|
244
254
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -317,7 +327,7 @@ class HuberRegressor(BaseTransformer):
|
|
317
327
|
cp.dump(self._sklearn_object, local_transform_file)
|
318
328
|
|
319
329
|
# Create temp stage to run fit.
|
320
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
330
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
321
331
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
322
332
|
SqlResultValidator(
|
323
333
|
session=session,
|
@@ -330,11 +340,12 @@ class HuberRegressor(BaseTransformer):
|
|
330
340
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
331
341
|
).validate()
|
332
342
|
|
333
|
-
|
343
|
+
# Use posixpath to construct stage paths
|
344
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
345
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
334
346
|
local_result_file_name = get_temp_file_path()
|
335
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
336
347
|
|
337
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
348
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
338
349
|
statement_params = telemetry.get_function_usage_statement_params(
|
339
350
|
project=_PROJECT,
|
340
351
|
subproject=_SUBPROJECT,
|
@@ -360,6 +371,7 @@ class HuberRegressor(BaseTransformer):
|
|
360
371
|
replace=True,
|
361
372
|
session=session,
|
362
373
|
statement_params=statement_params,
|
374
|
+
anonymous=True
|
363
375
|
)
|
364
376
|
def fit_wrapper_sproc(
|
365
377
|
session: Session,
|
@@ -368,7 +380,8 @@ class HuberRegressor(BaseTransformer):
|
|
368
380
|
stage_result_file_name: str,
|
369
381
|
input_cols: List[str],
|
370
382
|
label_cols: List[str],
|
371
|
-
sample_weight_col: Optional[str]
|
383
|
+
sample_weight_col: Optional[str],
|
384
|
+
statement_params: Dict[str, str]
|
372
385
|
) -> str:
|
373
386
|
import cloudpickle as cp
|
374
387
|
import numpy as np
|
@@ -435,15 +448,15 @@ class HuberRegressor(BaseTransformer):
|
|
435
448
|
api_calls=[Session.call],
|
436
449
|
custom_tags=dict([("autogen", True)]),
|
437
450
|
)
|
438
|
-
sproc_export_file_name =
|
439
|
-
|
451
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
452
|
+
session,
|
440
453
|
query,
|
441
454
|
stage_transform_file_name,
|
442
455
|
stage_result_file_name,
|
443
456
|
identifier.get_unescaped_names(self.input_cols),
|
444
457
|
identifier.get_unescaped_names(self.label_cols),
|
445
458
|
identifier.get_unescaped_names(self.sample_weight_col),
|
446
|
-
statement_params
|
459
|
+
statement_params,
|
447
460
|
)
|
448
461
|
|
449
462
|
if "|" in sproc_export_file_name:
|
@@ -453,7 +466,7 @@ class HuberRegressor(BaseTransformer):
|
|
453
466
|
print("\n".join(fields[1:]))
|
454
467
|
|
455
468
|
session.file.get(
|
456
|
-
|
469
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
457
470
|
local_result_file_name,
|
458
471
|
statement_params=statement_params
|
459
472
|
)
|
@@ -499,7 +512,7 @@ class HuberRegressor(BaseTransformer):
|
|
499
512
|
|
500
513
|
# Register vectorized UDF for batch inference
|
501
514
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
502
|
-
safe_id=self.
|
515
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
503
516
|
|
504
517
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
505
518
|
# will try to pickle all of self which fails.
|
@@ -591,7 +604,7 @@ class HuberRegressor(BaseTransformer):
|
|
591
604
|
return transformed_pandas_df.to_dict("records")
|
592
605
|
|
593
606
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
594
|
-
safe_id=self.
|
607
|
+
safe_id=self._get_rand_id()
|
595
608
|
)
|
596
609
|
|
597
610
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -758,11 +771,18 @@ class HuberRegressor(BaseTransformer):
|
|
758
771
|
Transformed dataset.
|
759
772
|
"""
|
760
773
|
if isinstance(dataset, DataFrame):
|
774
|
+
expected_type_inferred = "float"
|
775
|
+
# when it is classifier, infer the datatype from label columns
|
776
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
777
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
778
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
779
|
+
)
|
780
|
+
|
761
781
|
output_df = self._batch_inference(
|
762
782
|
dataset=dataset,
|
763
783
|
inference_method="predict",
|
764
784
|
expected_output_cols_list=self.output_cols,
|
765
|
-
expected_output_cols_type=
|
785
|
+
expected_output_cols_type=expected_type_inferred,
|
766
786
|
)
|
767
787
|
elif isinstance(dataset, pd.DataFrame):
|
768
788
|
output_df = self._sklearn_inference(
|
@@ -833,10 +853,10 @@ class HuberRegressor(BaseTransformer):
|
|
833
853
|
|
834
854
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
835
855
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
836
|
-
Returns
|
856
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
837
857
|
"""
|
838
858
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
839
|
-
return []
|
859
|
+
return [output_cols_prefix]
|
840
860
|
|
841
861
|
classes = self._sklearn_object.classes_
|
842
862
|
if isinstance(classes, numpy.ndarray):
|
@@ -1061,7 +1081,7 @@ class HuberRegressor(BaseTransformer):
|
|
1061
1081
|
cp.dump(self._sklearn_object, local_score_file)
|
1062
1082
|
|
1063
1083
|
# Create temp stage to run score.
|
1064
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1084
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1065
1085
|
session = dataset._session
|
1066
1086
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1067
1087
|
SqlResultValidator(
|
@@ -1075,8 +1095,9 @@ class HuberRegressor(BaseTransformer):
|
|
1075
1095
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1076
1096
|
).validate()
|
1077
1097
|
|
1078
|
-
|
1079
|
-
|
1098
|
+
# Use posixpath to construct stage paths
|
1099
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1100
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1080
1101
|
statement_params = telemetry.get_function_usage_statement_params(
|
1081
1102
|
project=_PROJECT,
|
1082
1103
|
subproject=_SUBPROJECT,
|
@@ -1102,6 +1123,7 @@ class HuberRegressor(BaseTransformer):
|
|
1102
1123
|
replace=True,
|
1103
1124
|
session=session,
|
1104
1125
|
statement_params=statement_params,
|
1126
|
+
anonymous=True
|
1105
1127
|
)
|
1106
1128
|
def score_wrapper_sproc(
|
1107
1129
|
session: Session,
|
@@ -1109,7 +1131,8 @@ class HuberRegressor(BaseTransformer):
|
|
1109
1131
|
stage_score_file_name: str,
|
1110
1132
|
input_cols: List[str],
|
1111
1133
|
label_cols: List[str],
|
1112
|
-
sample_weight_col: Optional[str]
|
1134
|
+
sample_weight_col: Optional[str],
|
1135
|
+
statement_params: Dict[str, str]
|
1113
1136
|
) -> float:
|
1114
1137
|
import cloudpickle as cp
|
1115
1138
|
import numpy as np
|
@@ -1159,14 +1182,14 @@ class HuberRegressor(BaseTransformer):
|
|
1159
1182
|
api_calls=[Session.call],
|
1160
1183
|
custom_tags=dict([("autogen", True)]),
|
1161
1184
|
)
|
1162
|
-
score =
|
1163
|
-
|
1185
|
+
score = score_wrapper_sproc(
|
1186
|
+
session,
|
1164
1187
|
query,
|
1165
1188
|
stage_score_file_name,
|
1166
1189
|
identifier.get_unescaped_names(self.input_cols),
|
1167
1190
|
identifier.get_unescaped_names(self.label_cols),
|
1168
1191
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1169
|
-
statement_params
|
1192
|
+
statement_params,
|
1170
1193
|
)
|
1171
1194
|
|
1172
1195
|
cleanup_temp_files([local_score_file_name])
|
@@ -1184,18 +1207,20 @@ class HuberRegressor(BaseTransformer):
|
|
1184
1207
|
if self._sklearn_object._estimator_type == 'classifier':
|
1185
1208
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1186
1209
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1187
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1210
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1211
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1188
1212
|
# For regressor, the type of predict is float64
|
1189
1213
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1190
1214
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1191
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1192
|
-
|
1215
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1216
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1193
1217
|
for prob_func in PROB_FUNCTIONS:
|
1194
1218
|
if hasattr(self, prob_func):
|
1195
1219
|
output_cols_prefix: str = f"{prob_func}_"
|
1196
1220
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1197
1221
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1198
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1222
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1223
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1199
1224
|
|
1200
1225
|
@property
|
1201
1226
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -239,7 +241,6 @@ class Lars(BaseTransformer):
|
|
239
241
|
sample_weight_col: Optional[str] = None,
|
240
242
|
) -> None:
|
241
243
|
super().__init__()
|
242
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
243
244
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
244
245
|
|
245
246
|
self._deps = list(deps)
|
@@ -268,6 +269,15 @@ class Lars(BaseTransformer):
|
|
268
269
|
self.set_drop_input_cols(drop_input_cols)
|
269
270
|
self.set_sample_weight_col(sample_weight_col)
|
270
271
|
|
272
|
+
def _get_rand_id(self) -> str:
|
273
|
+
"""
|
274
|
+
Generate random id to be used in sproc and stage names.
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
Random id string usable in sproc, table, and stage names.
|
278
|
+
"""
|
279
|
+
return str(uuid4()).replace("-", "_").upper()
|
280
|
+
|
271
281
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
272
282
|
"""
|
273
283
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -346,7 +356,7 @@ class Lars(BaseTransformer):
|
|
346
356
|
cp.dump(self._sklearn_object, local_transform_file)
|
347
357
|
|
348
358
|
# Create temp stage to run fit.
|
349
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
359
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
350
360
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
351
361
|
SqlResultValidator(
|
352
362
|
session=session,
|
@@ -359,11 +369,12 @@ class Lars(BaseTransformer):
|
|
359
369
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
360
370
|
).validate()
|
361
371
|
|
362
|
-
|
372
|
+
# Use posixpath to construct stage paths
|
373
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
374
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
363
375
|
local_result_file_name = get_temp_file_path()
|
364
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
365
376
|
|
366
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
377
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
367
378
|
statement_params = telemetry.get_function_usage_statement_params(
|
368
379
|
project=_PROJECT,
|
369
380
|
subproject=_SUBPROJECT,
|
@@ -389,6 +400,7 @@ class Lars(BaseTransformer):
|
|
389
400
|
replace=True,
|
390
401
|
session=session,
|
391
402
|
statement_params=statement_params,
|
403
|
+
anonymous=True
|
392
404
|
)
|
393
405
|
def fit_wrapper_sproc(
|
394
406
|
session: Session,
|
@@ -397,7 +409,8 @@ class Lars(BaseTransformer):
|
|
397
409
|
stage_result_file_name: str,
|
398
410
|
input_cols: List[str],
|
399
411
|
label_cols: List[str],
|
400
|
-
sample_weight_col: Optional[str]
|
412
|
+
sample_weight_col: Optional[str],
|
413
|
+
statement_params: Dict[str, str]
|
401
414
|
) -> str:
|
402
415
|
import cloudpickle as cp
|
403
416
|
import numpy as np
|
@@ -464,15 +477,15 @@ class Lars(BaseTransformer):
|
|
464
477
|
api_calls=[Session.call],
|
465
478
|
custom_tags=dict([("autogen", True)]),
|
466
479
|
)
|
467
|
-
sproc_export_file_name =
|
468
|
-
|
480
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
481
|
+
session,
|
469
482
|
query,
|
470
483
|
stage_transform_file_name,
|
471
484
|
stage_result_file_name,
|
472
485
|
identifier.get_unescaped_names(self.input_cols),
|
473
486
|
identifier.get_unescaped_names(self.label_cols),
|
474
487
|
identifier.get_unescaped_names(self.sample_weight_col),
|
475
|
-
statement_params
|
488
|
+
statement_params,
|
476
489
|
)
|
477
490
|
|
478
491
|
if "|" in sproc_export_file_name:
|
@@ -482,7 +495,7 @@ class Lars(BaseTransformer):
|
|
482
495
|
print("\n".join(fields[1:]))
|
483
496
|
|
484
497
|
session.file.get(
|
485
|
-
|
498
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
486
499
|
local_result_file_name,
|
487
500
|
statement_params=statement_params
|
488
501
|
)
|
@@ -528,7 +541,7 @@ class Lars(BaseTransformer):
|
|
528
541
|
|
529
542
|
# Register vectorized UDF for batch inference
|
530
543
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
531
|
-
safe_id=self.
|
544
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
532
545
|
|
533
546
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
534
547
|
# will try to pickle all of self which fails.
|
@@ -620,7 +633,7 @@ class Lars(BaseTransformer):
|
|
620
633
|
return transformed_pandas_df.to_dict("records")
|
621
634
|
|
622
635
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
623
|
-
safe_id=self.
|
636
|
+
safe_id=self._get_rand_id()
|
624
637
|
)
|
625
638
|
|
626
639
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -787,11 +800,18 @@ class Lars(BaseTransformer):
|
|
787
800
|
Transformed dataset.
|
788
801
|
"""
|
789
802
|
if isinstance(dataset, DataFrame):
|
803
|
+
expected_type_inferred = "float"
|
804
|
+
# when it is classifier, infer the datatype from label columns
|
805
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
806
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
807
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
808
|
+
)
|
809
|
+
|
790
810
|
output_df = self._batch_inference(
|
791
811
|
dataset=dataset,
|
792
812
|
inference_method="predict",
|
793
813
|
expected_output_cols_list=self.output_cols,
|
794
|
-
expected_output_cols_type=
|
814
|
+
expected_output_cols_type=expected_type_inferred,
|
795
815
|
)
|
796
816
|
elif isinstance(dataset, pd.DataFrame):
|
797
817
|
output_df = self._sklearn_inference(
|
@@ -862,10 +882,10 @@ class Lars(BaseTransformer):
|
|
862
882
|
|
863
883
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
864
884
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
865
|
-
Returns
|
885
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
866
886
|
"""
|
867
887
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
868
|
-
return []
|
888
|
+
return [output_cols_prefix]
|
869
889
|
|
870
890
|
classes = self._sklearn_object.classes_
|
871
891
|
if isinstance(classes, numpy.ndarray):
|
@@ -1090,7 +1110,7 @@ class Lars(BaseTransformer):
|
|
1090
1110
|
cp.dump(self._sklearn_object, local_score_file)
|
1091
1111
|
|
1092
1112
|
# Create temp stage to run score.
|
1093
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1113
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1094
1114
|
session = dataset._session
|
1095
1115
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1096
1116
|
SqlResultValidator(
|
@@ -1104,8 +1124,9 @@ class Lars(BaseTransformer):
|
|
1104
1124
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1105
1125
|
).validate()
|
1106
1126
|
|
1107
|
-
|
1108
|
-
|
1127
|
+
# Use posixpath to construct stage paths
|
1128
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1129
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1109
1130
|
statement_params = telemetry.get_function_usage_statement_params(
|
1110
1131
|
project=_PROJECT,
|
1111
1132
|
subproject=_SUBPROJECT,
|
@@ -1131,6 +1152,7 @@ class Lars(BaseTransformer):
|
|
1131
1152
|
replace=True,
|
1132
1153
|
session=session,
|
1133
1154
|
statement_params=statement_params,
|
1155
|
+
anonymous=True
|
1134
1156
|
)
|
1135
1157
|
def score_wrapper_sproc(
|
1136
1158
|
session: Session,
|
@@ -1138,7 +1160,8 @@ class Lars(BaseTransformer):
|
|
1138
1160
|
stage_score_file_name: str,
|
1139
1161
|
input_cols: List[str],
|
1140
1162
|
label_cols: List[str],
|
1141
|
-
sample_weight_col: Optional[str]
|
1163
|
+
sample_weight_col: Optional[str],
|
1164
|
+
statement_params: Dict[str, str]
|
1142
1165
|
) -> float:
|
1143
1166
|
import cloudpickle as cp
|
1144
1167
|
import numpy as np
|
@@ -1188,14 +1211,14 @@ class Lars(BaseTransformer):
|
|
1188
1211
|
api_calls=[Session.call],
|
1189
1212
|
custom_tags=dict([("autogen", True)]),
|
1190
1213
|
)
|
1191
|
-
score =
|
1192
|
-
|
1214
|
+
score = score_wrapper_sproc(
|
1215
|
+
session,
|
1193
1216
|
query,
|
1194
1217
|
stage_score_file_name,
|
1195
1218
|
identifier.get_unescaped_names(self.input_cols),
|
1196
1219
|
identifier.get_unescaped_names(self.label_cols),
|
1197
1220
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1198
|
-
statement_params
|
1221
|
+
statement_params,
|
1199
1222
|
)
|
1200
1223
|
|
1201
1224
|
cleanup_temp_files([local_score_file_name])
|
@@ -1213,18 +1236,20 @@ class Lars(BaseTransformer):
|
|
1213
1236
|
if self._sklearn_object._estimator_type == 'classifier':
|
1214
1237
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1215
1238
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1216
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1239
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1240
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1217
1241
|
# For regressor, the type of predict is float64
|
1218
1242
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1219
1243
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1220
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
-
|
1244
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1245
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1222
1246
|
for prob_func in PROB_FUNCTIONS:
|
1223
1247
|
if hasattr(self, prob_func):
|
1224
1248
|
output_cols_prefix: str = f"{prob_func}_"
|
1225
1249
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1226
1250
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1227
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1251
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1252
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1228
1253
|
|
1229
1254
|
@property
|
1230
1255
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|