snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -264,7 +266,6 @@ class SGDOneClassSVM(BaseTransformer):
|
|
264
266
|
sample_weight_col: Optional[str] = None,
|
265
267
|
) -> None:
|
266
268
|
super().__init__()
|
267
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
268
269
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
269
270
|
|
270
271
|
self._deps = list(deps)
|
@@ -295,6 +296,15 @@ class SGDOneClassSVM(BaseTransformer):
|
|
295
296
|
self.set_drop_input_cols(drop_input_cols)
|
296
297
|
self.set_sample_weight_col(sample_weight_col)
|
297
298
|
|
299
|
+
def _get_rand_id(self) -> str:
|
300
|
+
"""
|
301
|
+
Generate random id to be used in sproc and stage names.
|
302
|
+
|
303
|
+
Returns:
|
304
|
+
Random id string usable in sproc, table, and stage names.
|
305
|
+
"""
|
306
|
+
return str(uuid4()).replace("-", "_").upper()
|
307
|
+
|
298
308
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
299
309
|
"""
|
300
310
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -373,7 +383,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
373
383
|
cp.dump(self._sklearn_object, local_transform_file)
|
374
384
|
|
375
385
|
# Create temp stage to run fit.
|
376
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
386
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
377
387
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
378
388
|
SqlResultValidator(
|
379
389
|
session=session,
|
@@ -386,11 +396,12 @@ class SGDOneClassSVM(BaseTransformer):
|
|
386
396
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
387
397
|
).validate()
|
388
398
|
|
389
|
-
|
399
|
+
# Use posixpath to construct stage paths
|
400
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
401
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
390
402
|
local_result_file_name = get_temp_file_path()
|
391
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
392
403
|
|
393
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
404
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
394
405
|
statement_params = telemetry.get_function_usage_statement_params(
|
395
406
|
project=_PROJECT,
|
396
407
|
subproject=_SUBPROJECT,
|
@@ -416,6 +427,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
416
427
|
replace=True,
|
417
428
|
session=session,
|
418
429
|
statement_params=statement_params,
|
430
|
+
anonymous=True
|
419
431
|
)
|
420
432
|
def fit_wrapper_sproc(
|
421
433
|
session: Session,
|
@@ -424,7 +436,8 @@ class SGDOneClassSVM(BaseTransformer):
|
|
424
436
|
stage_result_file_name: str,
|
425
437
|
input_cols: List[str],
|
426
438
|
label_cols: List[str],
|
427
|
-
sample_weight_col: Optional[str]
|
439
|
+
sample_weight_col: Optional[str],
|
440
|
+
statement_params: Dict[str, str]
|
428
441
|
) -> str:
|
429
442
|
import cloudpickle as cp
|
430
443
|
import numpy as np
|
@@ -491,15 +504,15 @@ class SGDOneClassSVM(BaseTransformer):
|
|
491
504
|
api_calls=[Session.call],
|
492
505
|
custom_tags=dict([("autogen", True)]),
|
493
506
|
)
|
494
|
-
sproc_export_file_name =
|
495
|
-
|
507
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
508
|
+
session,
|
496
509
|
query,
|
497
510
|
stage_transform_file_name,
|
498
511
|
stage_result_file_name,
|
499
512
|
identifier.get_unescaped_names(self.input_cols),
|
500
513
|
identifier.get_unescaped_names(self.label_cols),
|
501
514
|
identifier.get_unescaped_names(self.sample_weight_col),
|
502
|
-
statement_params
|
515
|
+
statement_params,
|
503
516
|
)
|
504
517
|
|
505
518
|
if "|" in sproc_export_file_name:
|
@@ -509,7 +522,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
509
522
|
print("\n".join(fields[1:]))
|
510
523
|
|
511
524
|
session.file.get(
|
512
|
-
|
525
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
513
526
|
local_result_file_name,
|
514
527
|
statement_params=statement_params
|
515
528
|
)
|
@@ -555,7 +568,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
555
568
|
|
556
569
|
# Register vectorized UDF for batch inference
|
557
570
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
558
|
-
safe_id=self.
|
571
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
559
572
|
|
560
573
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
561
574
|
# will try to pickle all of self which fails.
|
@@ -647,7 +660,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
647
660
|
return transformed_pandas_df.to_dict("records")
|
648
661
|
|
649
662
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
650
|
-
safe_id=self.
|
663
|
+
safe_id=self._get_rand_id()
|
651
664
|
)
|
652
665
|
|
653
666
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -814,11 +827,18 @@ class SGDOneClassSVM(BaseTransformer):
|
|
814
827
|
Transformed dataset.
|
815
828
|
"""
|
816
829
|
if isinstance(dataset, DataFrame):
|
830
|
+
expected_type_inferred = ""
|
831
|
+
# when it is classifier, infer the datatype from label columns
|
832
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
833
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
834
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
835
|
+
)
|
836
|
+
|
817
837
|
output_df = self._batch_inference(
|
818
838
|
dataset=dataset,
|
819
839
|
inference_method="predict",
|
820
840
|
expected_output_cols_list=self.output_cols,
|
821
|
-
expected_output_cols_type=
|
841
|
+
expected_output_cols_type=expected_type_inferred,
|
822
842
|
)
|
823
843
|
elif isinstance(dataset, pd.DataFrame):
|
824
844
|
output_df = self._sklearn_inference(
|
@@ -889,10 +909,10 @@ class SGDOneClassSVM(BaseTransformer):
|
|
889
909
|
|
890
910
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
891
911
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
892
|
-
Returns
|
912
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
893
913
|
"""
|
894
914
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
895
|
-
return []
|
915
|
+
return [output_cols_prefix]
|
896
916
|
|
897
917
|
classes = self._sklearn_object.classes_
|
898
918
|
if isinstance(classes, numpy.ndarray):
|
@@ -1119,7 +1139,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
1119
1139
|
cp.dump(self._sklearn_object, local_score_file)
|
1120
1140
|
|
1121
1141
|
# Create temp stage to run score.
|
1122
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1142
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1123
1143
|
session = dataset._session
|
1124
1144
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1125
1145
|
SqlResultValidator(
|
@@ -1133,8 +1153,9 @@ class SGDOneClassSVM(BaseTransformer):
|
|
1133
1153
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1134
1154
|
).validate()
|
1135
1155
|
|
1136
|
-
|
1137
|
-
|
1156
|
+
# Use posixpath to construct stage paths
|
1157
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1158
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1138
1159
|
statement_params = telemetry.get_function_usage_statement_params(
|
1139
1160
|
project=_PROJECT,
|
1140
1161
|
subproject=_SUBPROJECT,
|
@@ -1160,6 +1181,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
1160
1181
|
replace=True,
|
1161
1182
|
session=session,
|
1162
1183
|
statement_params=statement_params,
|
1184
|
+
anonymous=True
|
1163
1185
|
)
|
1164
1186
|
def score_wrapper_sproc(
|
1165
1187
|
session: Session,
|
@@ -1167,7 +1189,8 @@ class SGDOneClassSVM(BaseTransformer):
|
|
1167
1189
|
stage_score_file_name: str,
|
1168
1190
|
input_cols: List[str],
|
1169
1191
|
label_cols: List[str],
|
1170
|
-
sample_weight_col: Optional[str]
|
1192
|
+
sample_weight_col: Optional[str],
|
1193
|
+
statement_params: Dict[str, str]
|
1171
1194
|
) -> float:
|
1172
1195
|
import cloudpickle as cp
|
1173
1196
|
import numpy as np
|
@@ -1217,14 +1240,14 @@ class SGDOneClassSVM(BaseTransformer):
|
|
1217
1240
|
api_calls=[Session.call],
|
1218
1241
|
custom_tags=dict([("autogen", True)]),
|
1219
1242
|
)
|
1220
|
-
score =
|
1221
|
-
|
1243
|
+
score = score_wrapper_sproc(
|
1244
|
+
session,
|
1222
1245
|
query,
|
1223
1246
|
stage_score_file_name,
|
1224
1247
|
identifier.get_unescaped_names(self.input_cols),
|
1225
1248
|
identifier.get_unescaped_names(self.label_cols),
|
1226
1249
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1227
|
-
statement_params
|
1250
|
+
statement_params,
|
1228
1251
|
)
|
1229
1252
|
|
1230
1253
|
cleanup_temp_files([local_score_file_name])
|
@@ -1242,18 +1265,20 @@ class SGDOneClassSVM(BaseTransformer):
|
|
1242
1265
|
if self._sklearn_object._estimator_type == 'classifier':
|
1243
1266
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1244
1267
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1245
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1268
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1269
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1246
1270
|
# For regressor, the type of predict is float64
|
1247
1271
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1248
1272
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1249
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1250
|
-
|
1273
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1274
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1251
1275
|
for prob_func in PROB_FUNCTIONS:
|
1252
1276
|
if hasattr(self, prob_func):
|
1253
1277
|
output_cols_prefix: str = f"{prob_func}_"
|
1254
1278
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1255
1279
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1256
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1280
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1281
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1257
1282
|
|
1258
1283
|
@property
|
1259
1284
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -323,7 +325,6 @@ class SGDRegressor(BaseTransformer):
|
|
323
325
|
sample_weight_col: Optional[str] = None,
|
324
326
|
) -> None:
|
325
327
|
super().__init__()
|
326
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
327
328
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
328
329
|
|
329
330
|
self._deps = list(deps)
|
@@ -361,6 +362,15 @@ class SGDRegressor(BaseTransformer):
|
|
361
362
|
self.set_drop_input_cols(drop_input_cols)
|
362
363
|
self.set_sample_weight_col(sample_weight_col)
|
363
364
|
|
365
|
+
def _get_rand_id(self) -> str:
|
366
|
+
"""
|
367
|
+
Generate random id to be used in sproc and stage names.
|
368
|
+
|
369
|
+
Returns:
|
370
|
+
Random id string usable in sproc, table, and stage names.
|
371
|
+
"""
|
372
|
+
return str(uuid4()).replace("-", "_").upper()
|
373
|
+
|
364
374
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
365
375
|
"""
|
366
376
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -439,7 +449,7 @@ class SGDRegressor(BaseTransformer):
|
|
439
449
|
cp.dump(self._sklearn_object, local_transform_file)
|
440
450
|
|
441
451
|
# Create temp stage to run fit.
|
442
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
452
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
443
453
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
444
454
|
SqlResultValidator(
|
445
455
|
session=session,
|
@@ -452,11 +462,12 @@ class SGDRegressor(BaseTransformer):
|
|
452
462
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
453
463
|
).validate()
|
454
464
|
|
455
|
-
|
465
|
+
# Use posixpath to construct stage paths
|
466
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
467
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
456
468
|
local_result_file_name = get_temp_file_path()
|
457
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
458
469
|
|
459
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
470
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
460
471
|
statement_params = telemetry.get_function_usage_statement_params(
|
461
472
|
project=_PROJECT,
|
462
473
|
subproject=_SUBPROJECT,
|
@@ -482,6 +493,7 @@ class SGDRegressor(BaseTransformer):
|
|
482
493
|
replace=True,
|
483
494
|
session=session,
|
484
495
|
statement_params=statement_params,
|
496
|
+
anonymous=True
|
485
497
|
)
|
486
498
|
def fit_wrapper_sproc(
|
487
499
|
session: Session,
|
@@ -490,7 +502,8 @@ class SGDRegressor(BaseTransformer):
|
|
490
502
|
stage_result_file_name: str,
|
491
503
|
input_cols: List[str],
|
492
504
|
label_cols: List[str],
|
493
|
-
sample_weight_col: Optional[str]
|
505
|
+
sample_weight_col: Optional[str],
|
506
|
+
statement_params: Dict[str, str]
|
494
507
|
) -> str:
|
495
508
|
import cloudpickle as cp
|
496
509
|
import numpy as np
|
@@ -557,15 +570,15 @@ class SGDRegressor(BaseTransformer):
|
|
557
570
|
api_calls=[Session.call],
|
558
571
|
custom_tags=dict([("autogen", True)]),
|
559
572
|
)
|
560
|
-
sproc_export_file_name =
|
561
|
-
|
573
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
574
|
+
session,
|
562
575
|
query,
|
563
576
|
stage_transform_file_name,
|
564
577
|
stage_result_file_name,
|
565
578
|
identifier.get_unescaped_names(self.input_cols),
|
566
579
|
identifier.get_unescaped_names(self.label_cols),
|
567
580
|
identifier.get_unescaped_names(self.sample_weight_col),
|
568
|
-
statement_params
|
581
|
+
statement_params,
|
569
582
|
)
|
570
583
|
|
571
584
|
if "|" in sproc_export_file_name:
|
@@ -575,7 +588,7 @@ class SGDRegressor(BaseTransformer):
|
|
575
588
|
print("\n".join(fields[1:]))
|
576
589
|
|
577
590
|
session.file.get(
|
578
|
-
|
591
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
579
592
|
local_result_file_name,
|
580
593
|
statement_params=statement_params
|
581
594
|
)
|
@@ -621,7 +634,7 @@ class SGDRegressor(BaseTransformer):
|
|
621
634
|
|
622
635
|
# Register vectorized UDF for batch inference
|
623
636
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
624
|
-
safe_id=self.
|
637
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
625
638
|
|
626
639
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
627
640
|
# will try to pickle all of self which fails.
|
@@ -713,7 +726,7 @@ class SGDRegressor(BaseTransformer):
|
|
713
726
|
return transformed_pandas_df.to_dict("records")
|
714
727
|
|
715
728
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
716
|
-
safe_id=self.
|
729
|
+
safe_id=self._get_rand_id()
|
717
730
|
)
|
718
731
|
|
719
732
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -880,11 +893,18 @@ class SGDRegressor(BaseTransformer):
|
|
880
893
|
Transformed dataset.
|
881
894
|
"""
|
882
895
|
if isinstance(dataset, DataFrame):
|
896
|
+
expected_type_inferred = "float"
|
897
|
+
# when it is classifier, infer the datatype from label columns
|
898
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
899
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
900
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
901
|
+
)
|
902
|
+
|
883
903
|
output_df = self._batch_inference(
|
884
904
|
dataset=dataset,
|
885
905
|
inference_method="predict",
|
886
906
|
expected_output_cols_list=self.output_cols,
|
887
|
-
expected_output_cols_type=
|
907
|
+
expected_output_cols_type=expected_type_inferred,
|
888
908
|
)
|
889
909
|
elif isinstance(dataset, pd.DataFrame):
|
890
910
|
output_df = self._sklearn_inference(
|
@@ -955,10 +975,10 @@ class SGDRegressor(BaseTransformer):
|
|
955
975
|
|
956
976
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
957
977
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
958
|
-
Returns
|
978
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
959
979
|
"""
|
960
980
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
961
|
-
return []
|
981
|
+
return [output_cols_prefix]
|
962
982
|
|
963
983
|
classes = self._sklearn_object.classes_
|
964
984
|
if isinstance(classes, numpy.ndarray):
|
@@ -1183,7 +1203,7 @@ class SGDRegressor(BaseTransformer):
|
|
1183
1203
|
cp.dump(self._sklearn_object, local_score_file)
|
1184
1204
|
|
1185
1205
|
# Create temp stage to run score.
|
1186
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1206
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1187
1207
|
session = dataset._session
|
1188
1208
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1189
1209
|
SqlResultValidator(
|
@@ -1197,8 +1217,9 @@ class SGDRegressor(BaseTransformer):
|
|
1197
1217
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1198
1218
|
).validate()
|
1199
1219
|
|
1200
|
-
|
1201
|
-
|
1220
|
+
# Use posixpath to construct stage paths
|
1221
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1222
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1202
1223
|
statement_params = telemetry.get_function_usage_statement_params(
|
1203
1224
|
project=_PROJECT,
|
1204
1225
|
subproject=_SUBPROJECT,
|
@@ -1224,6 +1245,7 @@ class SGDRegressor(BaseTransformer):
|
|
1224
1245
|
replace=True,
|
1225
1246
|
session=session,
|
1226
1247
|
statement_params=statement_params,
|
1248
|
+
anonymous=True
|
1227
1249
|
)
|
1228
1250
|
def score_wrapper_sproc(
|
1229
1251
|
session: Session,
|
@@ -1231,7 +1253,8 @@ class SGDRegressor(BaseTransformer):
|
|
1231
1253
|
stage_score_file_name: str,
|
1232
1254
|
input_cols: List[str],
|
1233
1255
|
label_cols: List[str],
|
1234
|
-
sample_weight_col: Optional[str]
|
1256
|
+
sample_weight_col: Optional[str],
|
1257
|
+
statement_params: Dict[str, str]
|
1235
1258
|
) -> float:
|
1236
1259
|
import cloudpickle as cp
|
1237
1260
|
import numpy as np
|
@@ -1281,14 +1304,14 @@ class SGDRegressor(BaseTransformer):
|
|
1281
1304
|
api_calls=[Session.call],
|
1282
1305
|
custom_tags=dict([("autogen", True)]),
|
1283
1306
|
)
|
1284
|
-
score =
|
1285
|
-
|
1307
|
+
score = score_wrapper_sproc(
|
1308
|
+
session,
|
1286
1309
|
query,
|
1287
1310
|
stage_score_file_name,
|
1288
1311
|
identifier.get_unescaped_names(self.input_cols),
|
1289
1312
|
identifier.get_unescaped_names(self.label_cols),
|
1290
1313
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1291
|
-
statement_params
|
1314
|
+
statement_params,
|
1292
1315
|
)
|
1293
1316
|
|
1294
1317
|
cleanup_temp_files([local_score_file_name])
|
@@ -1306,18 +1329,20 @@ class SGDRegressor(BaseTransformer):
|
|
1306
1329
|
if self._sklearn_object._estimator_type == 'classifier':
|
1307
1330
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1308
1331
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1309
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1332
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1333
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1310
1334
|
# For regressor, the type of predict is float64
|
1311
1335
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1312
1336
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1313
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1314
|
-
|
1337
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1338
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1315
1339
|
for prob_func in PROB_FUNCTIONS:
|
1316
1340
|
if hasattr(self, prob_func):
|
1317
1341
|
output_cols_prefix: str = f"{prob_func}_"
|
1318
1342
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1319
1343
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1320
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1344
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1345
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1321
1346
|
|
1322
1347
|
@property
|
1323
1348
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|