snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -285,7 +287,6 @@ class RANSACRegressor(BaseTransformer):
|
|
285
287
|
sample_weight_col: Optional[str] = None,
|
286
288
|
) -> None:
|
287
289
|
super().__init__()
|
288
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
289
290
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
290
291
|
deps = deps | _gather_dependencies(estimator)
|
291
292
|
deps = deps | _gather_dependencies(base_estimator)
|
@@ -319,6 +320,15 @@ class RANSACRegressor(BaseTransformer):
|
|
319
320
|
self.set_drop_input_cols(drop_input_cols)
|
320
321
|
self.set_sample_weight_col(sample_weight_col)
|
321
322
|
|
323
|
+
def _get_rand_id(self) -> str:
|
324
|
+
"""
|
325
|
+
Generate random id to be used in sproc and stage names.
|
326
|
+
|
327
|
+
Returns:
|
328
|
+
Random id string usable in sproc, table, and stage names.
|
329
|
+
"""
|
330
|
+
return str(uuid4()).replace("-", "_").upper()
|
331
|
+
|
322
332
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
323
333
|
"""
|
324
334
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -397,7 +407,7 @@ class RANSACRegressor(BaseTransformer):
|
|
397
407
|
cp.dump(self._sklearn_object, local_transform_file)
|
398
408
|
|
399
409
|
# Create temp stage to run fit.
|
400
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
410
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
401
411
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
402
412
|
SqlResultValidator(
|
403
413
|
session=session,
|
@@ -410,11 +420,12 @@ class RANSACRegressor(BaseTransformer):
|
|
410
420
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
411
421
|
).validate()
|
412
422
|
|
413
|
-
|
423
|
+
# Use posixpath to construct stage paths
|
424
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
425
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
414
426
|
local_result_file_name = get_temp_file_path()
|
415
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
416
427
|
|
417
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
428
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
418
429
|
statement_params = telemetry.get_function_usage_statement_params(
|
419
430
|
project=_PROJECT,
|
420
431
|
subproject=_SUBPROJECT,
|
@@ -440,6 +451,7 @@ class RANSACRegressor(BaseTransformer):
|
|
440
451
|
replace=True,
|
441
452
|
session=session,
|
442
453
|
statement_params=statement_params,
|
454
|
+
anonymous=True
|
443
455
|
)
|
444
456
|
def fit_wrapper_sproc(
|
445
457
|
session: Session,
|
@@ -448,7 +460,8 @@ class RANSACRegressor(BaseTransformer):
|
|
448
460
|
stage_result_file_name: str,
|
449
461
|
input_cols: List[str],
|
450
462
|
label_cols: List[str],
|
451
|
-
sample_weight_col: Optional[str]
|
463
|
+
sample_weight_col: Optional[str],
|
464
|
+
statement_params: Dict[str, str]
|
452
465
|
) -> str:
|
453
466
|
import cloudpickle as cp
|
454
467
|
import numpy as np
|
@@ -515,15 +528,15 @@ class RANSACRegressor(BaseTransformer):
|
|
515
528
|
api_calls=[Session.call],
|
516
529
|
custom_tags=dict([("autogen", True)]),
|
517
530
|
)
|
518
|
-
sproc_export_file_name =
|
519
|
-
|
531
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
532
|
+
session,
|
520
533
|
query,
|
521
534
|
stage_transform_file_name,
|
522
535
|
stage_result_file_name,
|
523
536
|
identifier.get_unescaped_names(self.input_cols),
|
524
537
|
identifier.get_unescaped_names(self.label_cols),
|
525
538
|
identifier.get_unescaped_names(self.sample_weight_col),
|
526
|
-
statement_params
|
539
|
+
statement_params,
|
527
540
|
)
|
528
541
|
|
529
542
|
if "|" in sproc_export_file_name:
|
@@ -533,7 +546,7 @@ class RANSACRegressor(BaseTransformer):
|
|
533
546
|
print("\n".join(fields[1:]))
|
534
547
|
|
535
548
|
session.file.get(
|
536
|
-
|
549
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
537
550
|
local_result_file_name,
|
538
551
|
statement_params=statement_params
|
539
552
|
)
|
@@ -579,7 +592,7 @@ class RANSACRegressor(BaseTransformer):
|
|
579
592
|
|
580
593
|
# Register vectorized UDF for batch inference
|
581
594
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
582
|
-
safe_id=self.
|
595
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
583
596
|
|
584
597
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
585
598
|
# will try to pickle all of self which fails.
|
@@ -671,7 +684,7 @@ class RANSACRegressor(BaseTransformer):
|
|
671
684
|
return transformed_pandas_df.to_dict("records")
|
672
685
|
|
673
686
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
674
|
-
safe_id=self.
|
687
|
+
safe_id=self._get_rand_id()
|
675
688
|
)
|
676
689
|
|
677
690
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -838,11 +851,18 @@ class RANSACRegressor(BaseTransformer):
|
|
838
851
|
Transformed dataset.
|
839
852
|
"""
|
840
853
|
if isinstance(dataset, DataFrame):
|
854
|
+
expected_type_inferred = "float"
|
855
|
+
# when it is classifier, infer the datatype from label columns
|
856
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
857
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
858
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
859
|
+
)
|
860
|
+
|
841
861
|
output_df = self._batch_inference(
|
842
862
|
dataset=dataset,
|
843
863
|
inference_method="predict",
|
844
864
|
expected_output_cols_list=self.output_cols,
|
845
|
-
expected_output_cols_type=
|
865
|
+
expected_output_cols_type=expected_type_inferred,
|
846
866
|
)
|
847
867
|
elif isinstance(dataset, pd.DataFrame):
|
848
868
|
output_df = self._sklearn_inference(
|
@@ -913,10 +933,10 @@ class RANSACRegressor(BaseTransformer):
|
|
913
933
|
|
914
934
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
915
935
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
916
|
-
Returns
|
936
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
917
937
|
"""
|
918
938
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
919
|
-
return []
|
939
|
+
return [output_cols_prefix]
|
920
940
|
|
921
941
|
classes = self._sklearn_object.classes_
|
922
942
|
if isinstance(classes, numpy.ndarray):
|
@@ -1141,7 +1161,7 @@ class RANSACRegressor(BaseTransformer):
|
|
1141
1161
|
cp.dump(self._sklearn_object, local_score_file)
|
1142
1162
|
|
1143
1163
|
# Create temp stage to run score.
|
1144
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1164
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1145
1165
|
session = dataset._session
|
1146
1166
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1147
1167
|
SqlResultValidator(
|
@@ -1155,8 +1175,9 @@ class RANSACRegressor(BaseTransformer):
|
|
1155
1175
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1156
1176
|
).validate()
|
1157
1177
|
|
1158
|
-
|
1159
|
-
|
1178
|
+
# Use posixpath to construct stage paths
|
1179
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1180
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1160
1181
|
statement_params = telemetry.get_function_usage_statement_params(
|
1161
1182
|
project=_PROJECT,
|
1162
1183
|
subproject=_SUBPROJECT,
|
@@ -1182,6 +1203,7 @@ class RANSACRegressor(BaseTransformer):
|
|
1182
1203
|
replace=True,
|
1183
1204
|
session=session,
|
1184
1205
|
statement_params=statement_params,
|
1206
|
+
anonymous=True
|
1185
1207
|
)
|
1186
1208
|
def score_wrapper_sproc(
|
1187
1209
|
session: Session,
|
@@ -1189,7 +1211,8 @@ class RANSACRegressor(BaseTransformer):
|
|
1189
1211
|
stage_score_file_name: str,
|
1190
1212
|
input_cols: List[str],
|
1191
1213
|
label_cols: List[str],
|
1192
|
-
sample_weight_col: Optional[str]
|
1214
|
+
sample_weight_col: Optional[str],
|
1215
|
+
statement_params: Dict[str, str]
|
1193
1216
|
) -> float:
|
1194
1217
|
import cloudpickle as cp
|
1195
1218
|
import numpy as np
|
@@ -1239,14 +1262,14 @@ class RANSACRegressor(BaseTransformer):
|
|
1239
1262
|
api_calls=[Session.call],
|
1240
1263
|
custom_tags=dict([("autogen", True)]),
|
1241
1264
|
)
|
1242
|
-
score =
|
1243
|
-
|
1265
|
+
score = score_wrapper_sproc(
|
1266
|
+
session,
|
1244
1267
|
query,
|
1245
1268
|
stage_score_file_name,
|
1246
1269
|
identifier.get_unescaped_names(self.input_cols),
|
1247
1270
|
identifier.get_unescaped_names(self.label_cols),
|
1248
1271
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1249
|
-
statement_params
|
1272
|
+
statement_params,
|
1250
1273
|
)
|
1251
1274
|
|
1252
1275
|
cleanup_temp_files([local_score_file_name])
|
@@ -1264,18 +1287,20 @@ class RANSACRegressor(BaseTransformer):
|
|
1264
1287
|
if self._sklearn_object._estimator_type == 'classifier':
|
1265
1288
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1266
1289
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1267
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1290
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1291
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1268
1292
|
# For regressor, the type of predict is float64
|
1269
1293
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1270
1294
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1271
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1272
|
-
|
1295
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1296
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1273
1297
|
for prob_func in PROB_FUNCTIONS:
|
1274
1298
|
if hasattr(self, prob_func):
|
1275
1299
|
output_cols_prefix: str = f"{prob_func}_"
|
1276
1300
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1277
1301
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1278
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1302
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1303
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1279
1304
|
|
1280
1305
|
@property
|
1281
1306
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -262,7 +264,6 @@ class Ridge(BaseTransformer):
|
|
262
264
|
sample_weight_col: Optional[str] = None,
|
263
265
|
) -> None:
|
264
266
|
super().__init__()
|
265
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
266
267
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
267
268
|
|
268
269
|
self._deps = list(deps)
|
@@ -289,6 +290,15 @@ class Ridge(BaseTransformer):
|
|
289
290
|
self.set_drop_input_cols(drop_input_cols)
|
290
291
|
self.set_sample_weight_col(sample_weight_col)
|
291
292
|
|
293
|
+
def _get_rand_id(self) -> str:
|
294
|
+
"""
|
295
|
+
Generate random id to be used in sproc and stage names.
|
296
|
+
|
297
|
+
Returns:
|
298
|
+
Random id string usable in sproc, table, and stage names.
|
299
|
+
"""
|
300
|
+
return str(uuid4()).replace("-", "_").upper()
|
301
|
+
|
292
302
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
293
303
|
"""
|
294
304
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -367,7 +377,7 @@ class Ridge(BaseTransformer):
|
|
367
377
|
cp.dump(self._sklearn_object, local_transform_file)
|
368
378
|
|
369
379
|
# Create temp stage to run fit.
|
370
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
380
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
371
381
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
372
382
|
SqlResultValidator(
|
373
383
|
session=session,
|
@@ -380,11 +390,12 @@ class Ridge(BaseTransformer):
|
|
380
390
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
381
391
|
).validate()
|
382
392
|
|
383
|
-
|
393
|
+
# Use posixpath to construct stage paths
|
394
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
395
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
384
396
|
local_result_file_name = get_temp_file_path()
|
385
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
386
397
|
|
387
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
398
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
388
399
|
statement_params = telemetry.get_function_usage_statement_params(
|
389
400
|
project=_PROJECT,
|
390
401
|
subproject=_SUBPROJECT,
|
@@ -410,6 +421,7 @@ class Ridge(BaseTransformer):
|
|
410
421
|
replace=True,
|
411
422
|
session=session,
|
412
423
|
statement_params=statement_params,
|
424
|
+
anonymous=True
|
413
425
|
)
|
414
426
|
def fit_wrapper_sproc(
|
415
427
|
session: Session,
|
@@ -418,7 +430,8 @@ class Ridge(BaseTransformer):
|
|
418
430
|
stage_result_file_name: str,
|
419
431
|
input_cols: List[str],
|
420
432
|
label_cols: List[str],
|
421
|
-
sample_weight_col: Optional[str]
|
433
|
+
sample_weight_col: Optional[str],
|
434
|
+
statement_params: Dict[str, str]
|
422
435
|
) -> str:
|
423
436
|
import cloudpickle as cp
|
424
437
|
import numpy as np
|
@@ -485,15 +498,15 @@ class Ridge(BaseTransformer):
|
|
485
498
|
api_calls=[Session.call],
|
486
499
|
custom_tags=dict([("autogen", True)]),
|
487
500
|
)
|
488
|
-
sproc_export_file_name =
|
489
|
-
|
501
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
502
|
+
session,
|
490
503
|
query,
|
491
504
|
stage_transform_file_name,
|
492
505
|
stage_result_file_name,
|
493
506
|
identifier.get_unescaped_names(self.input_cols),
|
494
507
|
identifier.get_unescaped_names(self.label_cols),
|
495
508
|
identifier.get_unescaped_names(self.sample_weight_col),
|
496
|
-
statement_params
|
509
|
+
statement_params,
|
497
510
|
)
|
498
511
|
|
499
512
|
if "|" in sproc_export_file_name:
|
@@ -503,7 +516,7 @@ class Ridge(BaseTransformer):
|
|
503
516
|
print("\n".join(fields[1:]))
|
504
517
|
|
505
518
|
session.file.get(
|
506
|
-
|
519
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
507
520
|
local_result_file_name,
|
508
521
|
statement_params=statement_params
|
509
522
|
)
|
@@ -549,7 +562,7 @@ class Ridge(BaseTransformer):
|
|
549
562
|
|
550
563
|
# Register vectorized UDF for batch inference
|
551
564
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
552
|
-
safe_id=self.
|
565
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
553
566
|
|
554
567
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
555
568
|
# will try to pickle all of self which fails.
|
@@ -641,7 +654,7 @@ class Ridge(BaseTransformer):
|
|
641
654
|
return transformed_pandas_df.to_dict("records")
|
642
655
|
|
643
656
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
644
|
-
safe_id=self.
|
657
|
+
safe_id=self._get_rand_id()
|
645
658
|
)
|
646
659
|
|
647
660
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -808,11 +821,18 @@ class Ridge(BaseTransformer):
|
|
808
821
|
Transformed dataset.
|
809
822
|
"""
|
810
823
|
if isinstance(dataset, DataFrame):
|
824
|
+
expected_type_inferred = "float"
|
825
|
+
# when it is classifier, infer the datatype from label columns
|
826
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
827
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
828
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
829
|
+
)
|
830
|
+
|
811
831
|
output_df = self._batch_inference(
|
812
832
|
dataset=dataset,
|
813
833
|
inference_method="predict",
|
814
834
|
expected_output_cols_list=self.output_cols,
|
815
|
-
expected_output_cols_type=
|
835
|
+
expected_output_cols_type=expected_type_inferred,
|
816
836
|
)
|
817
837
|
elif isinstance(dataset, pd.DataFrame):
|
818
838
|
output_df = self._sklearn_inference(
|
@@ -883,10 +903,10 @@ class Ridge(BaseTransformer):
|
|
883
903
|
|
884
904
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
885
905
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
886
|
-
Returns
|
906
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
887
907
|
"""
|
888
908
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
889
|
-
return []
|
909
|
+
return [output_cols_prefix]
|
890
910
|
|
891
911
|
classes = self._sklearn_object.classes_
|
892
912
|
if isinstance(classes, numpy.ndarray):
|
@@ -1111,7 +1131,7 @@ class Ridge(BaseTransformer):
|
|
1111
1131
|
cp.dump(self._sklearn_object, local_score_file)
|
1112
1132
|
|
1113
1133
|
# Create temp stage to run score.
|
1114
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1134
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1115
1135
|
session = dataset._session
|
1116
1136
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1117
1137
|
SqlResultValidator(
|
@@ -1125,8 +1145,9 @@ class Ridge(BaseTransformer):
|
|
1125
1145
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1126
1146
|
).validate()
|
1127
1147
|
|
1128
|
-
|
1129
|
-
|
1148
|
+
# Use posixpath to construct stage paths
|
1149
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1150
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1130
1151
|
statement_params = telemetry.get_function_usage_statement_params(
|
1131
1152
|
project=_PROJECT,
|
1132
1153
|
subproject=_SUBPROJECT,
|
@@ -1152,6 +1173,7 @@ class Ridge(BaseTransformer):
|
|
1152
1173
|
replace=True,
|
1153
1174
|
session=session,
|
1154
1175
|
statement_params=statement_params,
|
1176
|
+
anonymous=True
|
1155
1177
|
)
|
1156
1178
|
def score_wrapper_sproc(
|
1157
1179
|
session: Session,
|
@@ -1159,7 +1181,8 @@ class Ridge(BaseTransformer):
|
|
1159
1181
|
stage_score_file_name: str,
|
1160
1182
|
input_cols: List[str],
|
1161
1183
|
label_cols: List[str],
|
1162
|
-
sample_weight_col: Optional[str]
|
1184
|
+
sample_weight_col: Optional[str],
|
1185
|
+
statement_params: Dict[str, str]
|
1163
1186
|
) -> float:
|
1164
1187
|
import cloudpickle as cp
|
1165
1188
|
import numpy as np
|
@@ -1209,14 +1232,14 @@ class Ridge(BaseTransformer):
|
|
1209
1232
|
api_calls=[Session.call],
|
1210
1233
|
custom_tags=dict([("autogen", True)]),
|
1211
1234
|
)
|
1212
|
-
score =
|
1213
|
-
|
1235
|
+
score = score_wrapper_sproc(
|
1236
|
+
session,
|
1214
1237
|
query,
|
1215
1238
|
stage_score_file_name,
|
1216
1239
|
identifier.get_unescaped_names(self.input_cols),
|
1217
1240
|
identifier.get_unescaped_names(self.label_cols),
|
1218
1241
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1219
|
-
statement_params
|
1242
|
+
statement_params,
|
1220
1243
|
)
|
1221
1244
|
|
1222
1245
|
cleanup_temp_files([local_score_file_name])
|
@@ -1234,18 +1257,20 @@ class Ridge(BaseTransformer):
|
|
1234
1257
|
if self._sklearn_object._estimator_type == 'classifier':
|
1235
1258
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1236
1259
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1237
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1260
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1261
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1238
1262
|
# For regressor, the type of predict is float64
|
1239
1263
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1240
1264
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1241
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1242
|
-
|
1265
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1266
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1243
1267
|
for prob_func in PROB_FUNCTIONS:
|
1244
1268
|
if hasattr(self, prob_func):
|
1245
1269
|
output_cols_prefix: str = f"{prob_func}_"
|
1246
1270
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1247
1271
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1248
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1272
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1273
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1249
1274
|
|
1250
1275
|
@property
|
1251
1276
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|