snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -274,7 +276,6 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
274
276
|
sample_weight_col: Optional[str] = None,
|
275
277
|
) -> None:
|
276
278
|
super().__init__()
|
277
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
278
279
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
279
280
|
|
280
281
|
self._deps = list(deps)
|
@@ -308,6 +309,15 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
308
309
|
self.set_drop_input_cols(drop_input_cols)
|
309
310
|
self.set_sample_weight_col(sample_weight_col)
|
310
311
|
|
312
|
+
def _get_rand_id(self) -> str:
|
313
|
+
"""
|
314
|
+
Generate random id to be used in sproc and stage names.
|
315
|
+
|
316
|
+
Returns:
|
317
|
+
Random id string usable in sproc, table, and stage names.
|
318
|
+
"""
|
319
|
+
return str(uuid4()).replace("-", "_").upper()
|
320
|
+
|
311
321
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
312
322
|
"""
|
313
323
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -386,7 +396,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
386
396
|
cp.dump(self._sklearn_object, local_transform_file)
|
387
397
|
|
388
398
|
# Create temp stage to run fit.
|
389
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
399
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
390
400
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
391
401
|
SqlResultValidator(
|
392
402
|
session=session,
|
@@ -399,11 +409,12 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
399
409
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
400
410
|
).validate()
|
401
411
|
|
402
|
-
|
412
|
+
# Use posixpath to construct stage paths
|
413
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
414
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
403
415
|
local_result_file_name = get_temp_file_path()
|
404
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
405
416
|
|
406
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
417
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
407
418
|
statement_params = telemetry.get_function_usage_statement_params(
|
408
419
|
project=_PROJECT,
|
409
420
|
subproject=_SUBPROJECT,
|
@@ -429,6 +440,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
429
440
|
replace=True,
|
430
441
|
session=session,
|
431
442
|
statement_params=statement_params,
|
443
|
+
anonymous=True
|
432
444
|
)
|
433
445
|
def fit_wrapper_sproc(
|
434
446
|
session: Session,
|
@@ -437,7 +449,8 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
437
449
|
stage_result_file_name: str,
|
438
450
|
input_cols: List[str],
|
439
451
|
label_cols: List[str],
|
440
|
-
sample_weight_col: Optional[str]
|
452
|
+
sample_weight_col: Optional[str],
|
453
|
+
statement_params: Dict[str, str]
|
441
454
|
) -> str:
|
442
455
|
import cloudpickle as cp
|
443
456
|
import numpy as np
|
@@ -504,15 +517,15 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
504
517
|
api_calls=[Session.call],
|
505
518
|
custom_tags=dict([("autogen", True)]),
|
506
519
|
)
|
507
|
-
sproc_export_file_name =
|
508
|
-
|
520
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
521
|
+
session,
|
509
522
|
query,
|
510
523
|
stage_transform_file_name,
|
511
524
|
stage_result_file_name,
|
512
525
|
identifier.get_unescaped_names(self.input_cols),
|
513
526
|
identifier.get_unescaped_names(self.label_cols),
|
514
527
|
identifier.get_unescaped_names(self.sample_weight_col),
|
515
|
-
statement_params
|
528
|
+
statement_params,
|
516
529
|
)
|
517
530
|
|
518
531
|
if "|" in sproc_export_file_name:
|
@@ -522,7 +535,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
522
535
|
print("\n".join(fields[1:]))
|
523
536
|
|
524
537
|
session.file.get(
|
525
|
-
|
538
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
526
539
|
local_result_file_name,
|
527
540
|
statement_params=statement_params
|
528
541
|
)
|
@@ -568,7 +581,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
568
581
|
|
569
582
|
# Register vectorized UDF for batch inference
|
570
583
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
571
|
-
safe_id=self.
|
584
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
572
585
|
|
573
586
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
574
587
|
# will try to pickle all of self which fails.
|
@@ -660,7 +673,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
660
673
|
return transformed_pandas_df.to_dict("records")
|
661
674
|
|
662
675
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
663
|
-
safe_id=self.
|
676
|
+
safe_id=self._get_rand_id()
|
664
677
|
)
|
665
678
|
|
666
679
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -827,11 +840,18 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
827
840
|
Transformed dataset.
|
828
841
|
"""
|
829
842
|
if isinstance(dataset, DataFrame):
|
843
|
+
expected_type_inferred = ""
|
844
|
+
# when it is classifier, infer the datatype from label columns
|
845
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
846
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
847
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
848
|
+
)
|
849
|
+
|
830
850
|
output_df = self._batch_inference(
|
831
851
|
dataset=dataset,
|
832
852
|
inference_method="predict",
|
833
853
|
expected_output_cols_list=self.output_cols,
|
834
|
-
expected_output_cols_type=
|
854
|
+
expected_output_cols_type=expected_type_inferred,
|
835
855
|
)
|
836
856
|
elif isinstance(dataset, pd.DataFrame):
|
837
857
|
output_df = self._sklearn_inference(
|
@@ -902,10 +922,10 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
902
922
|
|
903
923
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
904
924
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
905
|
-
Returns
|
925
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
906
926
|
"""
|
907
927
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
908
|
-
return []
|
928
|
+
return [output_cols_prefix]
|
909
929
|
|
910
930
|
classes = self._sklearn_object.classes_
|
911
931
|
if isinstance(classes, numpy.ndarray):
|
@@ -1132,7 +1152,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
1132
1152
|
cp.dump(self._sklearn_object, local_score_file)
|
1133
1153
|
|
1134
1154
|
# Create temp stage to run score.
|
1135
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1155
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1136
1156
|
session = dataset._session
|
1137
1157
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1138
1158
|
SqlResultValidator(
|
@@ -1146,8 +1166,9 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
1146
1166
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1147
1167
|
).validate()
|
1148
1168
|
|
1149
|
-
|
1150
|
-
|
1169
|
+
# Use posixpath to construct stage paths
|
1170
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1171
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1151
1172
|
statement_params = telemetry.get_function_usage_statement_params(
|
1152
1173
|
project=_PROJECT,
|
1153
1174
|
subproject=_SUBPROJECT,
|
@@ -1173,6 +1194,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
1173
1194
|
replace=True,
|
1174
1195
|
session=session,
|
1175
1196
|
statement_params=statement_params,
|
1197
|
+
anonymous=True
|
1176
1198
|
)
|
1177
1199
|
def score_wrapper_sproc(
|
1178
1200
|
session: Session,
|
@@ -1180,7 +1202,8 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
1180
1202
|
stage_score_file_name: str,
|
1181
1203
|
input_cols: List[str],
|
1182
1204
|
label_cols: List[str],
|
1183
|
-
sample_weight_col: Optional[str]
|
1205
|
+
sample_weight_col: Optional[str],
|
1206
|
+
statement_params: Dict[str, str]
|
1184
1207
|
) -> float:
|
1185
1208
|
import cloudpickle as cp
|
1186
1209
|
import numpy as np
|
@@ -1230,14 +1253,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
1230
1253
|
api_calls=[Session.call],
|
1231
1254
|
custom_tags=dict([("autogen", True)]),
|
1232
1255
|
)
|
1233
|
-
score =
|
1234
|
-
|
1256
|
+
score = score_wrapper_sproc(
|
1257
|
+
session,
|
1235
1258
|
query,
|
1236
1259
|
stage_score_file_name,
|
1237
1260
|
identifier.get_unescaped_names(self.input_cols),
|
1238
1261
|
identifier.get_unescaped_names(self.label_cols),
|
1239
1262
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1240
|
-
statement_params
|
1263
|
+
statement_params,
|
1241
1264
|
)
|
1242
1265
|
|
1243
1266
|
cleanup_temp_files([local_score_file_name])
|
@@ -1255,18 +1278,20 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
1255
1278
|
if self._sklearn_object._estimator_type == 'classifier':
|
1256
1279
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1257
1280
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1258
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1281
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1282
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1259
1283
|
# For regressor, the type of predict is float64
|
1260
1284
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1261
1285
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1262
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1263
|
-
|
1286
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1287
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1264
1288
|
for prob_func in PROB_FUNCTIONS:
|
1265
1289
|
if hasattr(self, prob_func):
|
1266
1290
|
output_cols_prefix: str = f"{prob_func}_"
|
1267
1291
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1268
1292
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1269
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1293
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1294
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1270
1295
|
|
1271
1296
|
@property
|
1272
1297
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -262,7 +264,6 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
262
264
|
sample_weight_col: Optional[str] = None,
|
263
265
|
) -> None:
|
264
266
|
super().__init__()
|
265
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
266
267
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
267
268
|
|
268
269
|
self._deps = list(deps)
|
@@ -295,6 +296,15 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
295
296
|
self.set_drop_input_cols(drop_input_cols)
|
296
297
|
self.set_sample_weight_col(sample_weight_col)
|
297
298
|
|
299
|
+
def _get_rand_id(self) -> str:
|
300
|
+
"""
|
301
|
+
Generate random id to be used in sproc and stage names.
|
302
|
+
|
303
|
+
Returns:
|
304
|
+
Random id string usable in sproc, table, and stage names.
|
305
|
+
"""
|
306
|
+
return str(uuid4()).replace("-", "_").upper()
|
307
|
+
|
298
308
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
299
309
|
"""
|
300
310
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -373,7 +383,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
373
383
|
cp.dump(self._sklearn_object, local_transform_file)
|
374
384
|
|
375
385
|
# Create temp stage to run fit.
|
376
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
386
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
377
387
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
378
388
|
SqlResultValidator(
|
379
389
|
session=session,
|
@@ -386,11 +396,12 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
386
396
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
387
397
|
).validate()
|
388
398
|
|
389
|
-
|
399
|
+
# Use posixpath to construct stage paths
|
400
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
401
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
390
402
|
local_result_file_name = get_temp_file_path()
|
391
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
392
403
|
|
393
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
404
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
394
405
|
statement_params = telemetry.get_function_usage_statement_params(
|
395
406
|
project=_PROJECT,
|
396
407
|
subproject=_SUBPROJECT,
|
@@ -416,6 +427,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
416
427
|
replace=True,
|
417
428
|
session=session,
|
418
429
|
statement_params=statement_params,
|
430
|
+
anonymous=True
|
419
431
|
)
|
420
432
|
def fit_wrapper_sproc(
|
421
433
|
session: Session,
|
@@ -424,7 +436,8 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
424
436
|
stage_result_file_name: str,
|
425
437
|
input_cols: List[str],
|
426
438
|
label_cols: List[str],
|
427
|
-
sample_weight_col: Optional[str]
|
439
|
+
sample_weight_col: Optional[str],
|
440
|
+
statement_params: Dict[str, str]
|
428
441
|
) -> str:
|
429
442
|
import cloudpickle as cp
|
430
443
|
import numpy as np
|
@@ -491,15 +504,15 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
491
504
|
api_calls=[Session.call],
|
492
505
|
custom_tags=dict([("autogen", True)]),
|
493
506
|
)
|
494
|
-
sproc_export_file_name =
|
495
|
-
|
507
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
508
|
+
session,
|
496
509
|
query,
|
497
510
|
stage_transform_file_name,
|
498
511
|
stage_result_file_name,
|
499
512
|
identifier.get_unescaped_names(self.input_cols),
|
500
513
|
identifier.get_unescaped_names(self.label_cols),
|
501
514
|
identifier.get_unescaped_names(self.sample_weight_col),
|
502
|
-
statement_params
|
515
|
+
statement_params,
|
503
516
|
)
|
504
517
|
|
505
518
|
if "|" in sproc_export_file_name:
|
@@ -509,7 +522,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
509
522
|
print("\n".join(fields[1:]))
|
510
523
|
|
511
524
|
session.file.get(
|
512
|
-
|
525
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
513
526
|
local_result_file_name,
|
514
527
|
statement_params=statement_params
|
515
528
|
)
|
@@ -555,7 +568,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
555
568
|
|
556
569
|
# Register vectorized UDF for batch inference
|
557
570
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
558
|
-
safe_id=self.
|
571
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
559
572
|
|
560
573
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
561
574
|
# will try to pickle all of self which fails.
|
@@ -647,7 +660,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
647
660
|
return transformed_pandas_df.to_dict("records")
|
648
661
|
|
649
662
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
650
|
-
safe_id=self.
|
663
|
+
safe_id=self._get_rand_id()
|
651
664
|
)
|
652
665
|
|
653
666
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -814,11 +827,18 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
814
827
|
Transformed dataset.
|
815
828
|
"""
|
816
829
|
if isinstance(dataset, DataFrame):
|
830
|
+
expected_type_inferred = "float"
|
831
|
+
# when it is classifier, infer the datatype from label columns
|
832
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
833
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
834
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
835
|
+
)
|
836
|
+
|
817
837
|
output_df = self._batch_inference(
|
818
838
|
dataset=dataset,
|
819
839
|
inference_method="predict",
|
820
840
|
expected_output_cols_list=self.output_cols,
|
821
|
-
expected_output_cols_type=
|
841
|
+
expected_output_cols_type=expected_type_inferred,
|
822
842
|
)
|
823
843
|
elif isinstance(dataset, pd.DataFrame):
|
824
844
|
output_df = self._sklearn_inference(
|
@@ -889,10 +909,10 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
889
909
|
|
890
910
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
891
911
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
892
|
-
Returns
|
912
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
893
913
|
"""
|
894
914
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
895
|
-
return []
|
915
|
+
return [output_cols_prefix]
|
896
916
|
|
897
917
|
classes = self._sklearn_object.classes_
|
898
918
|
if isinstance(classes, numpy.ndarray):
|
@@ -1117,7 +1137,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
1117
1137
|
cp.dump(self._sklearn_object, local_score_file)
|
1118
1138
|
|
1119
1139
|
# Create temp stage to run score.
|
1120
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1140
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1121
1141
|
session = dataset._session
|
1122
1142
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1123
1143
|
SqlResultValidator(
|
@@ -1131,8 +1151,9 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
1131
1151
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1132
1152
|
).validate()
|
1133
1153
|
|
1134
|
-
|
1135
|
-
|
1154
|
+
# Use posixpath to construct stage paths
|
1155
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1156
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1136
1157
|
statement_params = telemetry.get_function_usage_statement_params(
|
1137
1158
|
project=_PROJECT,
|
1138
1159
|
subproject=_SUBPROJECT,
|
@@ -1158,6 +1179,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
1158
1179
|
replace=True,
|
1159
1180
|
session=session,
|
1160
1181
|
statement_params=statement_params,
|
1182
|
+
anonymous=True
|
1161
1183
|
)
|
1162
1184
|
def score_wrapper_sproc(
|
1163
1185
|
session: Session,
|
@@ -1165,7 +1187,8 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
1165
1187
|
stage_score_file_name: str,
|
1166
1188
|
input_cols: List[str],
|
1167
1189
|
label_cols: List[str],
|
1168
|
-
sample_weight_col: Optional[str]
|
1190
|
+
sample_weight_col: Optional[str],
|
1191
|
+
statement_params: Dict[str, str]
|
1169
1192
|
) -> float:
|
1170
1193
|
import cloudpickle as cp
|
1171
1194
|
import numpy as np
|
@@ -1215,14 +1238,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
1215
1238
|
api_calls=[Session.call],
|
1216
1239
|
custom_tags=dict([("autogen", True)]),
|
1217
1240
|
)
|
1218
|
-
score =
|
1219
|
-
|
1241
|
+
score = score_wrapper_sproc(
|
1242
|
+
session,
|
1220
1243
|
query,
|
1221
1244
|
stage_score_file_name,
|
1222
1245
|
identifier.get_unescaped_names(self.input_cols),
|
1223
1246
|
identifier.get_unescaped_names(self.label_cols),
|
1224
1247
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1225
|
-
statement_params
|
1248
|
+
statement_params,
|
1226
1249
|
)
|
1227
1250
|
|
1228
1251
|
cleanup_temp_files([local_score_file_name])
|
@@ -1240,18 +1263,20 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
1240
1263
|
if self._sklearn_object._estimator_type == 'classifier':
|
1241
1264
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1242
1265
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1243
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1266
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1267
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1244
1268
|
# For regressor, the type of predict is float64
|
1245
1269
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1246
1270
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1247
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1248
|
-
|
1271
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1272
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1249
1273
|
for prob_func in PROB_FUNCTIONS:
|
1250
1274
|
if hasattr(self, prob_func):
|
1251
1275
|
output_cols_prefix: str = f"{prob_func}_"
|
1252
1276
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1253
1277
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1254
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1278
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1279
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1255
1280
|
|
1256
1281
|
@property
|
1257
1282
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|