snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -277,7 +279,6 @@ class ElasticNetCV(BaseTransformer):
|
|
277
279
|
sample_weight_col: Optional[str] = None,
|
278
280
|
) -> None:
|
279
281
|
super().__init__()
|
280
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
281
282
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
282
283
|
|
283
284
|
self._deps = list(deps)
|
@@ -311,6 +312,15 @@ class ElasticNetCV(BaseTransformer):
|
|
311
312
|
self.set_drop_input_cols(drop_input_cols)
|
312
313
|
self.set_sample_weight_col(sample_weight_col)
|
313
314
|
|
315
|
+
def _get_rand_id(self) -> str:
|
316
|
+
"""
|
317
|
+
Generate random id to be used in sproc and stage names.
|
318
|
+
|
319
|
+
Returns:
|
320
|
+
Random id string usable in sproc, table, and stage names.
|
321
|
+
"""
|
322
|
+
return str(uuid4()).replace("-", "_").upper()
|
323
|
+
|
314
324
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
315
325
|
"""
|
316
326
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -389,7 +399,7 @@ class ElasticNetCV(BaseTransformer):
|
|
389
399
|
cp.dump(self._sklearn_object, local_transform_file)
|
390
400
|
|
391
401
|
# Create temp stage to run fit.
|
392
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
402
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
393
403
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
394
404
|
SqlResultValidator(
|
395
405
|
session=session,
|
@@ -402,11 +412,12 @@ class ElasticNetCV(BaseTransformer):
|
|
402
412
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
403
413
|
).validate()
|
404
414
|
|
405
|
-
|
415
|
+
# Use posixpath to construct stage paths
|
416
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
417
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
406
418
|
local_result_file_name = get_temp_file_path()
|
407
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
408
419
|
|
409
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
420
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
410
421
|
statement_params = telemetry.get_function_usage_statement_params(
|
411
422
|
project=_PROJECT,
|
412
423
|
subproject=_SUBPROJECT,
|
@@ -432,6 +443,7 @@ class ElasticNetCV(BaseTransformer):
|
|
432
443
|
replace=True,
|
433
444
|
session=session,
|
434
445
|
statement_params=statement_params,
|
446
|
+
anonymous=True
|
435
447
|
)
|
436
448
|
def fit_wrapper_sproc(
|
437
449
|
session: Session,
|
@@ -440,7 +452,8 @@ class ElasticNetCV(BaseTransformer):
|
|
440
452
|
stage_result_file_name: str,
|
441
453
|
input_cols: List[str],
|
442
454
|
label_cols: List[str],
|
443
|
-
sample_weight_col: Optional[str]
|
455
|
+
sample_weight_col: Optional[str],
|
456
|
+
statement_params: Dict[str, str]
|
444
457
|
) -> str:
|
445
458
|
import cloudpickle as cp
|
446
459
|
import numpy as np
|
@@ -507,15 +520,15 @@ class ElasticNetCV(BaseTransformer):
|
|
507
520
|
api_calls=[Session.call],
|
508
521
|
custom_tags=dict([("autogen", True)]),
|
509
522
|
)
|
510
|
-
sproc_export_file_name =
|
511
|
-
|
523
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
524
|
+
session,
|
512
525
|
query,
|
513
526
|
stage_transform_file_name,
|
514
527
|
stage_result_file_name,
|
515
528
|
identifier.get_unescaped_names(self.input_cols),
|
516
529
|
identifier.get_unescaped_names(self.label_cols),
|
517
530
|
identifier.get_unescaped_names(self.sample_weight_col),
|
518
|
-
statement_params
|
531
|
+
statement_params,
|
519
532
|
)
|
520
533
|
|
521
534
|
if "|" in sproc_export_file_name:
|
@@ -525,7 +538,7 @@ class ElasticNetCV(BaseTransformer):
|
|
525
538
|
print("\n".join(fields[1:]))
|
526
539
|
|
527
540
|
session.file.get(
|
528
|
-
|
541
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
529
542
|
local_result_file_name,
|
530
543
|
statement_params=statement_params
|
531
544
|
)
|
@@ -571,7 +584,7 @@ class ElasticNetCV(BaseTransformer):
|
|
571
584
|
|
572
585
|
# Register vectorized UDF for batch inference
|
573
586
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
574
|
-
safe_id=self.
|
587
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
575
588
|
|
576
589
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
577
590
|
# will try to pickle all of self which fails.
|
@@ -663,7 +676,7 @@ class ElasticNetCV(BaseTransformer):
|
|
663
676
|
return transformed_pandas_df.to_dict("records")
|
664
677
|
|
665
678
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
666
|
-
safe_id=self.
|
679
|
+
safe_id=self._get_rand_id()
|
667
680
|
)
|
668
681
|
|
669
682
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -830,11 +843,18 @@ class ElasticNetCV(BaseTransformer):
|
|
830
843
|
Transformed dataset.
|
831
844
|
"""
|
832
845
|
if isinstance(dataset, DataFrame):
|
846
|
+
expected_type_inferred = "float"
|
847
|
+
# when it is classifier, infer the datatype from label columns
|
848
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
849
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
850
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
851
|
+
)
|
852
|
+
|
833
853
|
output_df = self._batch_inference(
|
834
854
|
dataset=dataset,
|
835
855
|
inference_method="predict",
|
836
856
|
expected_output_cols_list=self.output_cols,
|
837
|
-
expected_output_cols_type=
|
857
|
+
expected_output_cols_type=expected_type_inferred,
|
838
858
|
)
|
839
859
|
elif isinstance(dataset, pd.DataFrame):
|
840
860
|
output_df = self._sklearn_inference(
|
@@ -905,10 +925,10 @@ class ElasticNetCV(BaseTransformer):
|
|
905
925
|
|
906
926
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
907
927
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
908
|
-
Returns
|
928
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
909
929
|
"""
|
910
930
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
911
|
-
return []
|
931
|
+
return [output_cols_prefix]
|
912
932
|
|
913
933
|
classes = self._sklearn_object.classes_
|
914
934
|
if isinstance(classes, numpy.ndarray):
|
@@ -1133,7 +1153,7 @@ class ElasticNetCV(BaseTransformer):
|
|
1133
1153
|
cp.dump(self._sklearn_object, local_score_file)
|
1134
1154
|
|
1135
1155
|
# Create temp stage to run score.
|
1136
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1156
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1137
1157
|
session = dataset._session
|
1138
1158
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1139
1159
|
SqlResultValidator(
|
@@ -1147,8 +1167,9 @@ class ElasticNetCV(BaseTransformer):
|
|
1147
1167
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1148
1168
|
).validate()
|
1149
1169
|
|
1150
|
-
|
1151
|
-
|
1170
|
+
# Use posixpath to construct stage paths
|
1171
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1172
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1152
1173
|
statement_params = telemetry.get_function_usage_statement_params(
|
1153
1174
|
project=_PROJECT,
|
1154
1175
|
subproject=_SUBPROJECT,
|
@@ -1174,6 +1195,7 @@ class ElasticNetCV(BaseTransformer):
|
|
1174
1195
|
replace=True,
|
1175
1196
|
session=session,
|
1176
1197
|
statement_params=statement_params,
|
1198
|
+
anonymous=True
|
1177
1199
|
)
|
1178
1200
|
def score_wrapper_sproc(
|
1179
1201
|
session: Session,
|
@@ -1181,7 +1203,8 @@ class ElasticNetCV(BaseTransformer):
|
|
1181
1203
|
stage_score_file_name: str,
|
1182
1204
|
input_cols: List[str],
|
1183
1205
|
label_cols: List[str],
|
1184
|
-
sample_weight_col: Optional[str]
|
1206
|
+
sample_weight_col: Optional[str],
|
1207
|
+
statement_params: Dict[str, str]
|
1185
1208
|
) -> float:
|
1186
1209
|
import cloudpickle as cp
|
1187
1210
|
import numpy as np
|
@@ -1231,14 +1254,14 @@ class ElasticNetCV(BaseTransformer):
|
|
1231
1254
|
api_calls=[Session.call],
|
1232
1255
|
custom_tags=dict([("autogen", True)]),
|
1233
1256
|
)
|
1234
|
-
score =
|
1235
|
-
|
1257
|
+
score = score_wrapper_sproc(
|
1258
|
+
session,
|
1236
1259
|
query,
|
1237
1260
|
stage_score_file_name,
|
1238
1261
|
identifier.get_unescaped_names(self.input_cols),
|
1239
1262
|
identifier.get_unescaped_names(self.label_cols),
|
1240
1263
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1241
|
-
statement_params
|
1264
|
+
statement_params,
|
1242
1265
|
)
|
1243
1266
|
|
1244
1267
|
cleanup_temp_files([local_score_file_name])
|
@@ -1256,18 +1279,20 @@ class ElasticNetCV(BaseTransformer):
|
|
1256
1279
|
if self._sklearn_object._estimator_type == 'classifier':
|
1257
1280
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1258
1281
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1259
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1282
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1283
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1260
1284
|
# For regressor, the type of predict is float64
|
1261
1285
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1262
1286
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1263
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1264
|
-
|
1287
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1288
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1265
1289
|
for prob_func in PROB_FUNCTIONS:
|
1266
1290
|
if hasattr(self, prob_func):
|
1267
1291
|
output_cols_prefix: str = f"{prob_func}_"
|
1268
1292
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1269
1293
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1270
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1294
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1295
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1271
1296
|
|
1272
1297
|
@property
|
1273
1298
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -230,7 +232,6 @@ class GammaRegressor(BaseTransformer):
|
|
230
232
|
sample_weight_col: Optional[str] = None,
|
231
233
|
) -> None:
|
232
234
|
super().__init__()
|
233
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
234
235
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
235
236
|
|
236
237
|
self._deps = list(deps)
|
@@ -256,6 +257,15 @@ class GammaRegressor(BaseTransformer):
|
|
256
257
|
self.set_drop_input_cols(drop_input_cols)
|
257
258
|
self.set_sample_weight_col(sample_weight_col)
|
258
259
|
|
260
|
+
def _get_rand_id(self) -> str:
|
261
|
+
"""
|
262
|
+
Generate random id to be used in sproc and stage names.
|
263
|
+
|
264
|
+
Returns:
|
265
|
+
Random id string usable in sproc, table, and stage names.
|
266
|
+
"""
|
267
|
+
return str(uuid4()).replace("-", "_").upper()
|
268
|
+
|
259
269
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
260
270
|
"""
|
261
271
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -334,7 +344,7 @@ class GammaRegressor(BaseTransformer):
|
|
334
344
|
cp.dump(self._sklearn_object, local_transform_file)
|
335
345
|
|
336
346
|
# Create temp stage to run fit.
|
337
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
347
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
338
348
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
339
349
|
SqlResultValidator(
|
340
350
|
session=session,
|
@@ -347,11 +357,12 @@ class GammaRegressor(BaseTransformer):
|
|
347
357
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
348
358
|
).validate()
|
349
359
|
|
350
|
-
|
360
|
+
# Use posixpath to construct stage paths
|
361
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
362
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
351
363
|
local_result_file_name = get_temp_file_path()
|
352
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
353
364
|
|
354
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
365
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
355
366
|
statement_params = telemetry.get_function_usage_statement_params(
|
356
367
|
project=_PROJECT,
|
357
368
|
subproject=_SUBPROJECT,
|
@@ -377,6 +388,7 @@ class GammaRegressor(BaseTransformer):
|
|
377
388
|
replace=True,
|
378
389
|
session=session,
|
379
390
|
statement_params=statement_params,
|
391
|
+
anonymous=True
|
380
392
|
)
|
381
393
|
def fit_wrapper_sproc(
|
382
394
|
session: Session,
|
@@ -385,7 +397,8 @@ class GammaRegressor(BaseTransformer):
|
|
385
397
|
stage_result_file_name: str,
|
386
398
|
input_cols: List[str],
|
387
399
|
label_cols: List[str],
|
388
|
-
sample_weight_col: Optional[str]
|
400
|
+
sample_weight_col: Optional[str],
|
401
|
+
statement_params: Dict[str, str]
|
389
402
|
) -> str:
|
390
403
|
import cloudpickle as cp
|
391
404
|
import numpy as np
|
@@ -452,15 +465,15 @@ class GammaRegressor(BaseTransformer):
|
|
452
465
|
api_calls=[Session.call],
|
453
466
|
custom_tags=dict([("autogen", True)]),
|
454
467
|
)
|
455
|
-
sproc_export_file_name =
|
456
|
-
|
468
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
469
|
+
session,
|
457
470
|
query,
|
458
471
|
stage_transform_file_name,
|
459
472
|
stage_result_file_name,
|
460
473
|
identifier.get_unescaped_names(self.input_cols),
|
461
474
|
identifier.get_unescaped_names(self.label_cols),
|
462
475
|
identifier.get_unescaped_names(self.sample_weight_col),
|
463
|
-
statement_params
|
476
|
+
statement_params,
|
464
477
|
)
|
465
478
|
|
466
479
|
if "|" in sproc_export_file_name:
|
@@ -470,7 +483,7 @@ class GammaRegressor(BaseTransformer):
|
|
470
483
|
print("\n".join(fields[1:]))
|
471
484
|
|
472
485
|
session.file.get(
|
473
|
-
|
486
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
474
487
|
local_result_file_name,
|
475
488
|
statement_params=statement_params
|
476
489
|
)
|
@@ -516,7 +529,7 @@ class GammaRegressor(BaseTransformer):
|
|
516
529
|
|
517
530
|
# Register vectorized UDF for batch inference
|
518
531
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
519
|
-
safe_id=self.
|
532
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
520
533
|
|
521
534
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
522
535
|
# will try to pickle all of self which fails.
|
@@ -608,7 +621,7 @@ class GammaRegressor(BaseTransformer):
|
|
608
621
|
return transformed_pandas_df.to_dict("records")
|
609
622
|
|
610
623
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
611
|
-
safe_id=self.
|
624
|
+
safe_id=self._get_rand_id()
|
612
625
|
)
|
613
626
|
|
614
627
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -775,11 +788,18 @@ class GammaRegressor(BaseTransformer):
|
|
775
788
|
Transformed dataset.
|
776
789
|
"""
|
777
790
|
if isinstance(dataset, DataFrame):
|
791
|
+
expected_type_inferred = "float"
|
792
|
+
# when it is classifier, infer the datatype from label columns
|
793
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
794
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
795
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
796
|
+
)
|
797
|
+
|
778
798
|
output_df = self._batch_inference(
|
779
799
|
dataset=dataset,
|
780
800
|
inference_method="predict",
|
781
801
|
expected_output_cols_list=self.output_cols,
|
782
|
-
expected_output_cols_type=
|
802
|
+
expected_output_cols_type=expected_type_inferred,
|
783
803
|
)
|
784
804
|
elif isinstance(dataset, pd.DataFrame):
|
785
805
|
output_df = self._sklearn_inference(
|
@@ -850,10 +870,10 @@ class GammaRegressor(BaseTransformer):
|
|
850
870
|
|
851
871
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
852
872
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
853
|
-
Returns
|
873
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
854
874
|
"""
|
855
875
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
856
|
-
return []
|
876
|
+
return [output_cols_prefix]
|
857
877
|
|
858
878
|
classes = self._sklearn_object.classes_
|
859
879
|
if isinstance(classes, numpy.ndarray):
|
@@ -1078,7 +1098,7 @@ class GammaRegressor(BaseTransformer):
|
|
1078
1098
|
cp.dump(self._sklearn_object, local_score_file)
|
1079
1099
|
|
1080
1100
|
# Create temp stage to run score.
|
1081
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1101
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1082
1102
|
session = dataset._session
|
1083
1103
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1084
1104
|
SqlResultValidator(
|
@@ -1092,8 +1112,9 @@ class GammaRegressor(BaseTransformer):
|
|
1092
1112
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1093
1113
|
).validate()
|
1094
1114
|
|
1095
|
-
|
1096
|
-
|
1115
|
+
# Use posixpath to construct stage paths
|
1116
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1117
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1097
1118
|
statement_params = telemetry.get_function_usage_statement_params(
|
1098
1119
|
project=_PROJECT,
|
1099
1120
|
subproject=_SUBPROJECT,
|
@@ -1119,6 +1140,7 @@ class GammaRegressor(BaseTransformer):
|
|
1119
1140
|
replace=True,
|
1120
1141
|
session=session,
|
1121
1142
|
statement_params=statement_params,
|
1143
|
+
anonymous=True
|
1122
1144
|
)
|
1123
1145
|
def score_wrapper_sproc(
|
1124
1146
|
session: Session,
|
@@ -1126,7 +1148,8 @@ class GammaRegressor(BaseTransformer):
|
|
1126
1148
|
stage_score_file_name: str,
|
1127
1149
|
input_cols: List[str],
|
1128
1150
|
label_cols: List[str],
|
1129
|
-
sample_weight_col: Optional[str]
|
1151
|
+
sample_weight_col: Optional[str],
|
1152
|
+
statement_params: Dict[str, str]
|
1130
1153
|
) -> float:
|
1131
1154
|
import cloudpickle as cp
|
1132
1155
|
import numpy as np
|
@@ -1176,14 +1199,14 @@ class GammaRegressor(BaseTransformer):
|
|
1176
1199
|
api_calls=[Session.call],
|
1177
1200
|
custom_tags=dict([("autogen", True)]),
|
1178
1201
|
)
|
1179
|
-
score =
|
1180
|
-
|
1202
|
+
score = score_wrapper_sproc(
|
1203
|
+
session,
|
1181
1204
|
query,
|
1182
1205
|
stage_score_file_name,
|
1183
1206
|
identifier.get_unescaped_names(self.input_cols),
|
1184
1207
|
identifier.get_unescaped_names(self.label_cols),
|
1185
1208
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1186
|
-
statement_params
|
1209
|
+
statement_params,
|
1187
1210
|
)
|
1188
1211
|
|
1189
1212
|
cleanup_temp_files([local_score_file_name])
|
@@ -1201,18 +1224,20 @@ class GammaRegressor(BaseTransformer):
|
|
1201
1224
|
if self._sklearn_object._estimator_type == 'classifier':
|
1202
1225
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1203
1226
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1204
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1227
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1228
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1205
1229
|
# For regressor, the type of predict is float64
|
1206
1230
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1207
1231
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1208
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1209
|
-
|
1232
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1233
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1210
1234
|
for prob_func in PROB_FUNCTIONS:
|
1211
1235
|
if hasattr(self, prob_func):
|
1212
1236
|
output_cols_prefix: str = f"{prob_func}_"
|
1213
1237
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1214
1238
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1215
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1239
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1240
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1216
1241
|
|
1217
1242
|
@property
|
1218
1243
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|