snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -250,7 +252,6 @@ class RidgeCV(BaseTransformer):
|
|
250
252
|
sample_weight_col: Optional[str] = None,
|
251
253
|
) -> None:
|
252
254
|
super().__init__()
|
253
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
254
255
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
255
256
|
|
256
257
|
self._deps = list(deps)
|
@@ -276,6 +277,15 @@ class RidgeCV(BaseTransformer):
|
|
276
277
|
self.set_drop_input_cols(drop_input_cols)
|
277
278
|
self.set_sample_weight_col(sample_weight_col)
|
278
279
|
|
280
|
+
def _get_rand_id(self) -> str:
|
281
|
+
"""
|
282
|
+
Generate random id to be used in sproc and stage names.
|
283
|
+
|
284
|
+
Returns:
|
285
|
+
Random id string usable in sproc, table, and stage names.
|
286
|
+
"""
|
287
|
+
return str(uuid4()).replace("-", "_").upper()
|
288
|
+
|
279
289
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
280
290
|
"""
|
281
291
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -354,7 +364,7 @@ class RidgeCV(BaseTransformer):
|
|
354
364
|
cp.dump(self._sklearn_object, local_transform_file)
|
355
365
|
|
356
366
|
# Create temp stage to run fit.
|
357
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
367
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
358
368
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
359
369
|
SqlResultValidator(
|
360
370
|
session=session,
|
@@ -367,11 +377,12 @@ class RidgeCV(BaseTransformer):
|
|
367
377
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
368
378
|
).validate()
|
369
379
|
|
370
|
-
|
380
|
+
# Use posixpath to construct stage paths
|
381
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
382
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
371
383
|
local_result_file_name = get_temp_file_path()
|
372
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
373
384
|
|
374
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
385
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
375
386
|
statement_params = telemetry.get_function_usage_statement_params(
|
376
387
|
project=_PROJECT,
|
377
388
|
subproject=_SUBPROJECT,
|
@@ -397,6 +408,7 @@ class RidgeCV(BaseTransformer):
|
|
397
408
|
replace=True,
|
398
409
|
session=session,
|
399
410
|
statement_params=statement_params,
|
411
|
+
anonymous=True
|
400
412
|
)
|
401
413
|
def fit_wrapper_sproc(
|
402
414
|
session: Session,
|
@@ -405,7 +417,8 @@ class RidgeCV(BaseTransformer):
|
|
405
417
|
stage_result_file_name: str,
|
406
418
|
input_cols: List[str],
|
407
419
|
label_cols: List[str],
|
408
|
-
sample_weight_col: Optional[str]
|
420
|
+
sample_weight_col: Optional[str],
|
421
|
+
statement_params: Dict[str, str]
|
409
422
|
) -> str:
|
410
423
|
import cloudpickle as cp
|
411
424
|
import numpy as np
|
@@ -472,15 +485,15 @@ class RidgeCV(BaseTransformer):
|
|
472
485
|
api_calls=[Session.call],
|
473
486
|
custom_tags=dict([("autogen", True)]),
|
474
487
|
)
|
475
|
-
sproc_export_file_name =
|
476
|
-
|
488
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
489
|
+
session,
|
477
490
|
query,
|
478
491
|
stage_transform_file_name,
|
479
492
|
stage_result_file_name,
|
480
493
|
identifier.get_unescaped_names(self.input_cols),
|
481
494
|
identifier.get_unescaped_names(self.label_cols),
|
482
495
|
identifier.get_unescaped_names(self.sample_weight_col),
|
483
|
-
statement_params
|
496
|
+
statement_params,
|
484
497
|
)
|
485
498
|
|
486
499
|
if "|" in sproc_export_file_name:
|
@@ -490,7 +503,7 @@ class RidgeCV(BaseTransformer):
|
|
490
503
|
print("\n".join(fields[1:]))
|
491
504
|
|
492
505
|
session.file.get(
|
493
|
-
|
506
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
494
507
|
local_result_file_name,
|
495
508
|
statement_params=statement_params
|
496
509
|
)
|
@@ -536,7 +549,7 @@ class RidgeCV(BaseTransformer):
|
|
536
549
|
|
537
550
|
# Register vectorized UDF for batch inference
|
538
551
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
539
|
-
safe_id=self.
|
552
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
540
553
|
|
541
554
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
542
555
|
# will try to pickle all of self which fails.
|
@@ -628,7 +641,7 @@ class RidgeCV(BaseTransformer):
|
|
628
641
|
return transformed_pandas_df.to_dict("records")
|
629
642
|
|
630
643
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
631
|
-
safe_id=self.
|
644
|
+
safe_id=self._get_rand_id()
|
632
645
|
)
|
633
646
|
|
634
647
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -795,11 +808,18 @@ class RidgeCV(BaseTransformer):
|
|
795
808
|
Transformed dataset.
|
796
809
|
"""
|
797
810
|
if isinstance(dataset, DataFrame):
|
811
|
+
expected_type_inferred = "float"
|
812
|
+
# when it is classifier, infer the datatype from label columns
|
813
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
814
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
815
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
816
|
+
)
|
817
|
+
|
798
818
|
output_df = self._batch_inference(
|
799
819
|
dataset=dataset,
|
800
820
|
inference_method="predict",
|
801
821
|
expected_output_cols_list=self.output_cols,
|
802
|
-
expected_output_cols_type=
|
822
|
+
expected_output_cols_type=expected_type_inferred,
|
803
823
|
)
|
804
824
|
elif isinstance(dataset, pd.DataFrame):
|
805
825
|
output_df = self._sklearn_inference(
|
@@ -870,10 +890,10 @@ class RidgeCV(BaseTransformer):
|
|
870
890
|
|
871
891
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
872
892
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
873
|
-
Returns
|
893
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
874
894
|
"""
|
875
895
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
876
|
-
return []
|
896
|
+
return [output_cols_prefix]
|
877
897
|
|
878
898
|
classes = self._sklearn_object.classes_
|
879
899
|
if isinstance(classes, numpy.ndarray):
|
@@ -1098,7 +1118,7 @@ class RidgeCV(BaseTransformer):
|
|
1098
1118
|
cp.dump(self._sklearn_object, local_score_file)
|
1099
1119
|
|
1100
1120
|
# Create temp stage to run score.
|
1101
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1121
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1102
1122
|
session = dataset._session
|
1103
1123
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1104
1124
|
SqlResultValidator(
|
@@ -1112,8 +1132,9 @@ class RidgeCV(BaseTransformer):
|
|
1112
1132
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1113
1133
|
).validate()
|
1114
1134
|
|
1115
|
-
|
1116
|
-
|
1135
|
+
# Use posixpath to construct stage paths
|
1136
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1137
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1117
1138
|
statement_params = telemetry.get_function_usage_statement_params(
|
1118
1139
|
project=_PROJECT,
|
1119
1140
|
subproject=_SUBPROJECT,
|
@@ -1139,6 +1160,7 @@ class RidgeCV(BaseTransformer):
|
|
1139
1160
|
replace=True,
|
1140
1161
|
session=session,
|
1141
1162
|
statement_params=statement_params,
|
1163
|
+
anonymous=True
|
1142
1164
|
)
|
1143
1165
|
def score_wrapper_sproc(
|
1144
1166
|
session: Session,
|
@@ -1146,7 +1168,8 @@ class RidgeCV(BaseTransformer):
|
|
1146
1168
|
stage_score_file_name: str,
|
1147
1169
|
input_cols: List[str],
|
1148
1170
|
label_cols: List[str],
|
1149
|
-
sample_weight_col: Optional[str]
|
1171
|
+
sample_weight_col: Optional[str],
|
1172
|
+
statement_params: Dict[str, str]
|
1150
1173
|
) -> float:
|
1151
1174
|
import cloudpickle as cp
|
1152
1175
|
import numpy as np
|
@@ -1196,14 +1219,14 @@ class RidgeCV(BaseTransformer):
|
|
1196
1219
|
api_calls=[Session.call],
|
1197
1220
|
custom_tags=dict([("autogen", True)]),
|
1198
1221
|
)
|
1199
|
-
score =
|
1200
|
-
|
1222
|
+
score = score_wrapper_sproc(
|
1223
|
+
session,
|
1201
1224
|
query,
|
1202
1225
|
stage_score_file_name,
|
1203
1226
|
identifier.get_unescaped_names(self.input_cols),
|
1204
1227
|
identifier.get_unescaped_names(self.label_cols),
|
1205
1228
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1206
|
-
statement_params
|
1229
|
+
statement_params,
|
1207
1230
|
)
|
1208
1231
|
|
1209
1232
|
cleanup_temp_files([local_score_file_name])
|
@@ -1221,18 +1244,20 @@ class RidgeCV(BaseTransformer):
|
|
1221
1244
|
if self._sklearn_object._estimator_type == 'classifier':
|
1222
1245
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1223
1246
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1224
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1247
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1248
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1225
1249
|
# For regressor, the type of predict is float64
|
1226
1250
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1227
1251
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1228
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1229
|
-
|
1252
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1253
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1230
1254
|
for prob_func in PROB_FUNCTIONS:
|
1231
1255
|
if hasattr(self, prob_func):
|
1232
1256
|
output_cols_prefix: str = f"{prob_func}_"
|
1233
1257
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1234
1258
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1235
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1259
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1260
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1236
1261
|
|
1237
1262
|
@property
|
1238
1263
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -355,7 +357,6 @@ class SGDClassifier(BaseTransformer):
|
|
355
357
|
sample_weight_col: Optional[str] = None,
|
356
358
|
) -> None:
|
357
359
|
super().__init__()
|
358
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
359
360
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
360
361
|
|
361
362
|
self._deps = list(deps)
|
@@ -395,6 +396,15 @@ class SGDClassifier(BaseTransformer):
|
|
395
396
|
self.set_drop_input_cols(drop_input_cols)
|
396
397
|
self.set_sample_weight_col(sample_weight_col)
|
397
398
|
|
399
|
+
def _get_rand_id(self) -> str:
|
400
|
+
"""
|
401
|
+
Generate random id to be used in sproc and stage names.
|
402
|
+
|
403
|
+
Returns:
|
404
|
+
Random id string usable in sproc, table, and stage names.
|
405
|
+
"""
|
406
|
+
return str(uuid4()).replace("-", "_").upper()
|
407
|
+
|
398
408
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
399
409
|
"""
|
400
410
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -473,7 +483,7 @@ class SGDClassifier(BaseTransformer):
|
|
473
483
|
cp.dump(self._sklearn_object, local_transform_file)
|
474
484
|
|
475
485
|
# Create temp stage to run fit.
|
476
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
486
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
477
487
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
478
488
|
SqlResultValidator(
|
479
489
|
session=session,
|
@@ -486,11 +496,12 @@ class SGDClassifier(BaseTransformer):
|
|
486
496
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
487
497
|
).validate()
|
488
498
|
|
489
|
-
|
499
|
+
# Use posixpath to construct stage paths
|
500
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
501
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
490
502
|
local_result_file_name = get_temp_file_path()
|
491
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
492
503
|
|
493
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
504
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
494
505
|
statement_params = telemetry.get_function_usage_statement_params(
|
495
506
|
project=_PROJECT,
|
496
507
|
subproject=_SUBPROJECT,
|
@@ -516,6 +527,7 @@ class SGDClassifier(BaseTransformer):
|
|
516
527
|
replace=True,
|
517
528
|
session=session,
|
518
529
|
statement_params=statement_params,
|
530
|
+
anonymous=True
|
519
531
|
)
|
520
532
|
def fit_wrapper_sproc(
|
521
533
|
session: Session,
|
@@ -524,7 +536,8 @@ class SGDClassifier(BaseTransformer):
|
|
524
536
|
stage_result_file_name: str,
|
525
537
|
input_cols: List[str],
|
526
538
|
label_cols: List[str],
|
527
|
-
sample_weight_col: Optional[str]
|
539
|
+
sample_weight_col: Optional[str],
|
540
|
+
statement_params: Dict[str, str]
|
528
541
|
) -> str:
|
529
542
|
import cloudpickle as cp
|
530
543
|
import numpy as np
|
@@ -591,15 +604,15 @@ class SGDClassifier(BaseTransformer):
|
|
591
604
|
api_calls=[Session.call],
|
592
605
|
custom_tags=dict([("autogen", True)]),
|
593
606
|
)
|
594
|
-
sproc_export_file_name =
|
595
|
-
|
607
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
608
|
+
session,
|
596
609
|
query,
|
597
610
|
stage_transform_file_name,
|
598
611
|
stage_result_file_name,
|
599
612
|
identifier.get_unescaped_names(self.input_cols),
|
600
613
|
identifier.get_unescaped_names(self.label_cols),
|
601
614
|
identifier.get_unescaped_names(self.sample_weight_col),
|
602
|
-
statement_params
|
615
|
+
statement_params,
|
603
616
|
)
|
604
617
|
|
605
618
|
if "|" in sproc_export_file_name:
|
@@ -609,7 +622,7 @@ class SGDClassifier(BaseTransformer):
|
|
609
622
|
print("\n".join(fields[1:]))
|
610
623
|
|
611
624
|
session.file.get(
|
612
|
-
|
625
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
613
626
|
local_result_file_name,
|
614
627
|
statement_params=statement_params
|
615
628
|
)
|
@@ -655,7 +668,7 @@ class SGDClassifier(BaseTransformer):
|
|
655
668
|
|
656
669
|
# Register vectorized UDF for batch inference
|
657
670
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
658
|
-
safe_id=self.
|
671
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
659
672
|
|
660
673
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
661
674
|
# will try to pickle all of self which fails.
|
@@ -747,7 +760,7 @@ class SGDClassifier(BaseTransformer):
|
|
747
760
|
return transformed_pandas_df.to_dict("records")
|
748
761
|
|
749
762
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
750
|
-
safe_id=self.
|
763
|
+
safe_id=self._get_rand_id()
|
751
764
|
)
|
752
765
|
|
753
766
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -914,11 +927,18 @@ class SGDClassifier(BaseTransformer):
|
|
914
927
|
Transformed dataset.
|
915
928
|
"""
|
916
929
|
if isinstance(dataset, DataFrame):
|
930
|
+
expected_type_inferred = ""
|
931
|
+
# when it is classifier, infer the datatype from label columns
|
932
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
933
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
934
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
935
|
+
)
|
936
|
+
|
917
937
|
output_df = self._batch_inference(
|
918
938
|
dataset=dataset,
|
919
939
|
inference_method="predict",
|
920
940
|
expected_output_cols_list=self.output_cols,
|
921
|
-
expected_output_cols_type=
|
941
|
+
expected_output_cols_type=expected_type_inferred,
|
922
942
|
)
|
923
943
|
elif isinstance(dataset, pd.DataFrame):
|
924
944
|
output_df = self._sklearn_inference(
|
@@ -989,10 +1009,10 @@ class SGDClassifier(BaseTransformer):
|
|
989
1009
|
|
990
1010
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
991
1011
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
992
|
-
Returns
|
1012
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
993
1013
|
"""
|
994
1014
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
995
|
-
return []
|
1015
|
+
return [output_cols_prefix]
|
996
1016
|
|
997
1017
|
classes = self._sklearn_object.classes_
|
998
1018
|
if isinstance(classes, numpy.ndarray):
|
@@ -1223,7 +1243,7 @@ class SGDClassifier(BaseTransformer):
|
|
1223
1243
|
cp.dump(self._sklearn_object, local_score_file)
|
1224
1244
|
|
1225
1245
|
# Create temp stage to run score.
|
1226
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1246
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1227
1247
|
session = dataset._session
|
1228
1248
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1229
1249
|
SqlResultValidator(
|
@@ -1237,8 +1257,9 @@ class SGDClassifier(BaseTransformer):
|
|
1237
1257
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1238
1258
|
).validate()
|
1239
1259
|
|
1240
|
-
|
1241
|
-
|
1260
|
+
# Use posixpath to construct stage paths
|
1261
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1262
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1242
1263
|
statement_params = telemetry.get_function_usage_statement_params(
|
1243
1264
|
project=_PROJECT,
|
1244
1265
|
subproject=_SUBPROJECT,
|
@@ -1264,6 +1285,7 @@ class SGDClassifier(BaseTransformer):
|
|
1264
1285
|
replace=True,
|
1265
1286
|
session=session,
|
1266
1287
|
statement_params=statement_params,
|
1288
|
+
anonymous=True
|
1267
1289
|
)
|
1268
1290
|
def score_wrapper_sproc(
|
1269
1291
|
session: Session,
|
@@ -1271,7 +1293,8 @@ class SGDClassifier(BaseTransformer):
|
|
1271
1293
|
stage_score_file_name: str,
|
1272
1294
|
input_cols: List[str],
|
1273
1295
|
label_cols: List[str],
|
1274
|
-
sample_weight_col: Optional[str]
|
1296
|
+
sample_weight_col: Optional[str],
|
1297
|
+
statement_params: Dict[str, str]
|
1275
1298
|
) -> float:
|
1276
1299
|
import cloudpickle as cp
|
1277
1300
|
import numpy as np
|
@@ -1321,14 +1344,14 @@ class SGDClassifier(BaseTransformer):
|
|
1321
1344
|
api_calls=[Session.call],
|
1322
1345
|
custom_tags=dict([("autogen", True)]),
|
1323
1346
|
)
|
1324
|
-
score =
|
1325
|
-
|
1347
|
+
score = score_wrapper_sproc(
|
1348
|
+
session,
|
1326
1349
|
query,
|
1327
1350
|
stage_score_file_name,
|
1328
1351
|
identifier.get_unescaped_names(self.input_cols),
|
1329
1352
|
identifier.get_unescaped_names(self.label_cols),
|
1330
1353
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1331
|
-
statement_params
|
1354
|
+
statement_params,
|
1332
1355
|
)
|
1333
1356
|
|
1334
1357
|
cleanup_temp_files([local_score_file_name])
|
@@ -1346,18 +1369,20 @@ class SGDClassifier(BaseTransformer):
|
|
1346
1369
|
if self._sklearn_object._estimator_type == 'classifier':
|
1347
1370
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1348
1371
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1349
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1372
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1373
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1350
1374
|
# For regressor, the type of predict is float64
|
1351
1375
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1352
1376
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1353
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1354
|
-
|
1377
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1378
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1355
1379
|
for prob_func in PROB_FUNCTIONS:
|
1356
1380
|
if hasattr(self, prob_func):
|
1357
1381
|
output_cols_prefix: str = f"{prob_func}_"
|
1358
1382
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1359
1383
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1360
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1384
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1385
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1361
1386
|
|
1362
1387
|
@property
|
1363
1388
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|