snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -244,7 +246,6 @@ class IsolationForest(BaseTransformer):
|
|
244
246
|
sample_weight_col: Optional[str] = None,
|
245
247
|
) -> None:
|
246
248
|
super().__init__()
|
247
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
248
249
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
249
250
|
|
250
251
|
self._deps = list(deps)
|
@@ -272,6 +273,15 @@ class IsolationForest(BaseTransformer):
|
|
272
273
|
self.set_drop_input_cols(drop_input_cols)
|
273
274
|
self.set_sample_weight_col(sample_weight_col)
|
274
275
|
|
276
|
+
def _get_rand_id(self) -> str:
|
277
|
+
"""
|
278
|
+
Generate random id to be used in sproc and stage names.
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
Random id string usable in sproc, table, and stage names.
|
282
|
+
"""
|
283
|
+
return str(uuid4()).replace("-", "_").upper()
|
284
|
+
|
275
285
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
276
286
|
"""
|
277
287
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -350,7 +360,7 @@ class IsolationForest(BaseTransformer):
|
|
350
360
|
cp.dump(self._sklearn_object, local_transform_file)
|
351
361
|
|
352
362
|
# Create temp stage to run fit.
|
353
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
363
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
354
364
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
355
365
|
SqlResultValidator(
|
356
366
|
session=session,
|
@@ -363,11 +373,12 @@ class IsolationForest(BaseTransformer):
|
|
363
373
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
364
374
|
).validate()
|
365
375
|
|
366
|
-
|
376
|
+
# Use posixpath to construct stage paths
|
377
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
378
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
367
379
|
local_result_file_name = get_temp_file_path()
|
368
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
369
380
|
|
370
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
381
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
371
382
|
statement_params = telemetry.get_function_usage_statement_params(
|
372
383
|
project=_PROJECT,
|
373
384
|
subproject=_SUBPROJECT,
|
@@ -393,6 +404,7 @@ class IsolationForest(BaseTransformer):
|
|
393
404
|
replace=True,
|
394
405
|
session=session,
|
395
406
|
statement_params=statement_params,
|
407
|
+
anonymous=True
|
396
408
|
)
|
397
409
|
def fit_wrapper_sproc(
|
398
410
|
session: Session,
|
@@ -401,7 +413,8 @@ class IsolationForest(BaseTransformer):
|
|
401
413
|
stage_result_file_name: str,
|
402
414
|
input_cols: List[str],
|
403
415
|
label_cols: List[str],
|
404
|
-
sample_weight_col: Optional[str]
|
416
|
+
sample_weight_col: Optional[str],
|
417
|
+
statement_params: Dict[str, str]
|
405
418
|
) -> str:
|
406
419
|
import cloudpickle as cp
|
407
420
|
import numpy as np
|
@@ -468,15 +481,15 @@ class IsolationForest(BaseTransformer):
|
|
468
481
|
api_calls=[Session.call],
|
469
482
|
custom_tags=dict([("autogen", True)]),
|
470
483
|
)
|
471
|
-
sproc_export_file_name =
|
472
|
-
|
484
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
485
|
+
session,
|
473
486
|
query,
|
474
487
|
stage_transform_file_name,
|
475
488
|
stage_result_file_name,
|
476
489
|
identifier.get_unescaped_names(self.input_cols),
|
477
490
|
identifier.get_unescaped_names(self.label_cols),
|
478
491
|
identifier.get_unescaped_names(self.sample_weight_col),
|
479
|
-
statement_params
|
492
|
+
statement_params,
|
480
493
|
)
|
481
494
|
|
482
495
|
if "|" in sproc_export_file_name:
|
@@ -486,7 +499,7 @@ class IsolationForest(BaseTransformer):
|
|
486
499
|
print("\n".join(fields[1:]))
|
487
500
|
|
488
501
|
session.file.get(
|
489
|
-
|
502
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
490
503
|
local_result_file_name,
|
491
504
|
statement_params=statement_params
|
492
505
|
)
|
@@ -532,7 +545,7 @@ class IsolationForest(BaseTransformer):
|
|
532
545
|
|
533
546
|
# Register vectorized UDF for batch inference
|
534
547
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
535
|
-
safe_id=self.
|
548
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
536
549
|
|
537
550
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
538
551
|
# will try to pickle all of self which fails.
|
@@ -624,7 +637,7 @@ class IsolationForest(BaseTransformer):
|
|
624
637
|
return transformed_pandas_df.to_dict("records")
|
625
638
|
|
626
639
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
627
|
-
safe_id=self.
|
640
|
+
safe_id=self._get_rand_id()
|
628
641
|
)
|
629
642
|
|
630
643
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -791,11 +804,18 @@ class IsolationForest(BaseTransformer):
|
|
791
804
|
Transformed dataset.
|
792
805
|
"""
|
793
806
|
if isinstance(dataset, DataFrame):
|
807
|
+
expected_type_inferred = ""
|
808
|
+
# when it is classifier, infer the datatype from label columns
|
809
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
810
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
811
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
812
|
+
)
|
813
|
+
|
794
814
|
output_df = self._batch_inference(
|
795
815
|
dataset=dataset,
|
796
816
|
inference_method="predict",
|
797
817
|
expected_output_cols_list=self.output_cols,
|
798
|
-
expected_output_cols_type=
|
818
|
+
expected_output_cols_type=expected_type_inferred,
|
799
819
|
)
|
800
820
|
elif isinstance(dataset, pd.DataFrame):
|
801
821
|
output_df = self._sklearn_inference(
|
@@ -866,10 +886,10 @@ class IsolationForest(BaseTransformer):
|
|
866
886
|
|
867
887
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
868
888
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
869
|
-
Returns
|
889
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
870
890
|
"""
|
871
891
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
872
|
-
return []
|
892
|
+
return [output_cols_prefix]
|
873
893
|
|
874
894
|
classes = self._sklearn_object.classes_
|
875
895
|
if isinstance(classes, numpy.ndarray):
|
@@ -1096,7 +1116,7 @@ class IsolationForest(BaseTransformer):
|
|
1096
1116
|
cp.dump(self._sklearn_object, local_score_file)
|
1097
1117
|
|
1098
1118
|
# Create temp stage to run score.
|
1099
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1119
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1100
1120
|
session = dataset._session
|
1101
1121
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1102
1122
|
SqlResultValidator(
|
@@ -1110,8 +1130,9 @@ class IsolationForest(BaseTransformer):
|
|
1110
1130
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1111
1131
|
).validate()
|
1112
1132
|
|
1113
|
-
|
1114
|
-
|
1133
|
+
# Use posixpath to construct stage paths
|
1134
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1135
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1115
1136
|
statement_params = telemetry.get_function_usage_statement_params(
|
1116
1137
|
project=_PROJECT,
|
1117
1138
|
subproject=_SUBPROJECT,
|
@@ -1137,6 +1158,7 @@ class IsolationForest(BaseTransformer):
|
|
1137
1158
|
replace=True,
|
1138
1159
|
session=session,
|
1139
1160
|
statement_params=statement_params,
|
1161
|
+
anonymous=True
|
1140
1162
|
)
|
1141
1163
|
def score_wrapper_sproc(
|
1142
1164
|
session: Session,
|
@@ -1144,7 +1166,8 @@ class IsolationForest(BaseTransformer):
|
|
1144
1166
|
stage_score_file_name: str,
|
1145
1167
|
input_cols: List[str],
|
1146
1168
|
label_cols: List[str],
|
1147
|
-
sample_weight_col: Optional[str]
|
1169
|
+
sample_weight_col: Optional[str],
|
1170
|
+
statement_params: Dict[str, str]
|
1148
1171
|
) -> float:
|
1149
1172
|
import cloudpickle as cp
|
1150
1173
|
import numpy as np
|
@@ -1194,14 +1217,14 @@ class IsolationForest(BaseTransformer):
|
|
1194
1217
|
api_calls=[Session.call],
|
1195
1218
|
custom_tags=dict([("autogen", True)]),
|
1196
1219
|
)
|
1197
|
-
score =
|
1198
|
-
|
1220
|
+
score = score_wrapper_sproc(
|
1221
|
+
session,
|
1199
1222
|
query,
|
1200
1223
|
stage_score_file_name,
|
1201
1224
|
identifier.get_unescaped_names(self.input_cols),
|
1202
1225
|
identifier.get_unescaped_names(self.label_cols),
|
1203
1226
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1204
|
-
statement_params
|
1227
|
+
statement_params,
|
1205
1228
|
)
|
1206
1229
|
|
1207
1230
|
cleanup_temp_files([local_score_file_name])
|
@@ -1219,18 +1242,20 @@ class IsolationForest(BaseTransformer):
|
|
1219
1242
|
if self._sklearn_object._estimator_type == 'classifier':
|
1220
1243
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1221
1244
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1222
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1245
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1246
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1223
1247
|
# For regressor, the type of predict is float64
|
1224
1248
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1225
1249
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1226
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1227
|
-
|
1250
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1251
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1228
1252
|
for prob_func in PROB_FUNCTIONS:
|
1229
1253
|
if hasattr(self, prob_func):
|
1230
1254
|
output_cols_prefix: str = f"{prob_func}_"
|
1231
1255
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1232
1256
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1233
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1257
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1258
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1234
1259
|
|
1235
1260
|
@property
|
1236
1261
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -344,7 +346,6 @@ class RandomForestClassifier(BaseTransformer):
|
|
344
346
|
sample_weight_col: Optional[str] = None,
|
345
347
|
) -> None:
|
346
348
|
super().__init__()
|
347
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
348
349
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
349
350
|
|
350
351
|
self._deps = list(deps)
|
@@ -381,6 +382,15 @@ class RandomForestClassifier(BaseTransformer):
|
|
381
382
|
self.set_drop_input_cols(drop_input_cols)
|
382
383
|
self.set_sample_weight_col(sample_weight_col)
|
383
384
|
|
385
|
+
def _get_rand_id(self) -> str:
|
386
|
+
"""
|
387
|
+
Generate random id to be used in sproc and stage names.
|
388
|
+
|
389
|
+
Returns:
|
390
|
+
Random id string usable in sproc, table, and stage names.
|
391
|
+
"""
|
392
|
+
return str(uuid4()).replace("-", "_").upper()
|
393
|
+
|
384
394
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
385
395
|
"""
|
386
396
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -459,7 +469,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
459
469
|
cp.dump(self._sklearn_object, local_transform_file)
|
460
470
|
|
461
471
|
# Create temp stage to run fit.
|
462
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
472
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
463
473
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
464
474
|
SqlResultValidator(
|
465
475
|
session=session,
|
@@ -472,11 +482,12 @@ class RandomForestClassifier(BaseTransformer):
|
|
472
482
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
473
483
|
).validate()
|
474
484
|
|
475
|
-
|
485
|
+
# Use posixpath to construct stage paths
|
486
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
487
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
476
488
|
local_result_file_name = get_temp_file_path()
|
477
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
478
489
|
|
479
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
490
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
480
491
|
statement_params = telemetry.get_function_usage_statement_params(
|
481
492
|
project=_PROJECT,
|
482
493
|
subproject=_SUBPROJECT,
|
@@ -502,6 +513,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
502
513
|
replace=True,
|
503
514
|
session=session,
|
504
515
|
statement_params=statement_params,
|
516
|
+
anonymous=True
|
505
517
|
)
|
506
518
|
def fit_wrapper_sproc(
|
507
519
|
session: Session,
|
@@ -510,7 +522,8 @@ class RandomForestClassifier(BaseTransformer):
|
|
510
522
|
stage_result_file_name: str,
|
511
523
|
input_cols: List[str],
|
512
524
|
label_cols: List[str],
|
513
|
-
sample_weight_col: Optional[str]
|
525
|
+
sample_weight_col: Optional[str],
|
526
|
+
statement_params: Dict[str, str]
|
514
527
|
) -> str:
|
515
528
|
import cloudpickle as cp
|
516
529
|
import numpy as np
|
@@ -577,15 +590,15 @@ class RandomForestClassifier(BaseTransformer):
|
|
577
590
|
api_calls=[Session.call],
|
578
591
|
custom_tags=dict([("autogen", True)]),
|
579
592
|
)
|
580
|
-
sproc_export_file_name =
|
581
|
-
|
593
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
594
|
+
session,
|
582
595
|
query,
|
583
596
|
stage_transform_file_name,
|
584
597
|
stage_result_file_name,
|
585
598
|
identifier.get_unescaped_names(self.input_cols),
|
586
599
|
identifier.get_unescaped_names(self.label_cols),
|
587
600
|
identifier.get_unescaped_names(self.sample_weight_col),
|
588
|
-
statement_params
|
601
|
+
statement_params,
|
589
602
|
)
|
590
603
|
|
591
604
|
if "|" in sproc_export_file_name:
|
@@ -595,7 +608,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
595
608
|
print("\n".join(fields[1:]))
|
596
609
|
|
597
610
|
session.file.get(
|
598
|
-
|
611
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
599
612
|
local_result_file_name,
|
600
613
|
statement_params=statement_params
|
601
614
|
)
|
@@ -641,7 +654,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
641
654
|
|
642
655
|
# Register vectorized UDF for batch inference
|
643
656
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
644
|
-
safe_id=self.
|
657
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
645
658
|
|
646
659
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
647
660
|
# will try to pickle all of self which fails.
|
@@ -733,7 +746,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
733
746
|
return transformed_pandas_df.to_dict("records")
|
734
747
|
|
735
748
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
736
|
-
safe_id=self.
|
749
|
+
safe_id=self._get_rand_id()
|
737
750
|
)
|
738
751
|
|
739
752
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -900,11 +913,18 @@ class RandomForestClassifier(BaseTransformer):
|
|
900
913
|
Transformed dataset.
|
901
914
|
"""
|
902
915
|
if isinstance(dataset, DataFrame):
|
916
|
+
expected_type_inferred = ""
|
917
|
+
# when it is classifier, infer the datatype from label columns
|
918
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
919
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
920
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
921
|
+
)
|
922
|
+
|
903
923
|
output_df = self._batch_inference(
|
904
924
|
dataset=dataset,
|
905
925
|
inference_method="predict",
|
906
926
|
expected_output_cols_list=self.output_cols,
|
907
|
-
expected_output_cols_type=
|
927
|
+
expected_output_cols_type=expected_type_inferred,
|
908
928
|
)
|
909
929
|
elif isinstance(dataset, pd.DataFrame):
|
910
930
|
output_df = self._sklearn_inference(
|
@@ -975,10 +995,10 @@ class RandomForestClassifier(BaseTransformer):
|
|
975
995
|
|
976
996
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
977
997
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
978
|
-
Returns
|
998
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
979
999
|
"""
|
980
1000
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
981
|
-
return []
|
1001
|
+
return [output_cols_prefix]
|
982
1002
|
|
983
1003
|
classes = self._sklearn_object.classes_
|
984
1004
|
if isinstance(classes, numpy.ndarray):
|
@@ -1207,7 +1227,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
1207
1227
|
cp.dump(self._sklearn_object, local_score_file)
|
1208
1228
|
|
1209
1229
|
# Create temp stage to run score.
|
1210
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1230
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1211
1231
|
session = dataset._session
|
1212
1232
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1213
1233
|
SqlResultValidator(
|
@@ -1221,8 +1241,9 @@ class RandomForestClassifier(BaseTransformer):
|
|
1221
1241
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1222
1242
|
).validate()
|
1223
1243
|
|
1224
|
-
|
1225
|
-
|
1244
|
+
# Use posixpath to construct stage paths
|
1245
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1246
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1226
1247
|
statement_params = telemetry.get_function_usage_statement_params(
|
1227
1248
|
project=_PROJECT,
|
1228
1249
|
subproject=_SUBPROJECT,
|
@@ -1248,6 +1269,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
1248
1269
|
replace=True,
|
1249
1270
|
session=session,
|
1250
1271
|
statement_params=statement_params,
|
1272
|
+
anonymous=True
|
1251
1273
|
)
|
1252
1274
|
def score_wrapper_sproc(
|
1253
1275
|
session: Session,
|
@@ -1255,7 +1277,8 @@ class RandomForestClassifier(BaseTransformer):
|
|
1255
1277
|
stage_score_file_name: str,
|
1256
1278
|
input_cols: List[str],
|
1257
1279
|
label_cols: List[str],
|
1258
|
-
sample_weight_col: Optional[str]
|
1280
|
+
sample_weight_col: Optional[str],
|
1281
|
+
statement_params: Dict[str, str]
|
1259
1282
|
) -> float:
|
1260
1283
|
import cloudpickle as cp
|
1261
1284
|
import numpy as np
|
@@ -1305,14 +1328,14 @@ class RandomForestClassifier(BaseTransformer):
|
|
1305
1328
|
api_calls=[Session.call],
|
1306
1329
|
custom_tags=dict([("autogen", True)]),
|
1307
1330
|
)
|
1308
|
-
score =
|
1309
|
-
|
1331
|
+
score = score_wrapper_sproc(
|
1332
|
+
session,
|
1310
1333
|
query,
|
1311
1334
|
stage_score_file_name,
|
1312
1335
|
identifier.get_unescaped_names(self.input_cols),
|
1313
1336
|
identifier.get_unescaped_names(self.label_cols),
|
1314
1337
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1315
|
-
statement_params
|
1338
|
+
statement_params,
|
1316
1339
|
)
|
1317
1340
|
|
1318
1341
|
cleanup_temp_files([local_score_file_name])
|
@@ -1330,18 +1353,20 @@ class RandomForestClassifier(BaseTransformer):
|
|
1330
1353
|
if self._sklearn_object._estimator_type == 'classifier':
|
1331
1354
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1332
1355
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1333
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1356
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1357
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1334
1358
|
# For regressor, the type of predict is float64
|
1335
1359
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1336
1360
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1337
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1338
|
-
|
1361
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1362
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1339
1363
|
for prob_func in PROB_FUNCTIONS:
|
1340
1364
|
if hasattr(self, prob_func):
|
1341
1365
|
output_cols_prefix: str = f"{prob_func}_"
|
1342
1366
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1343
1367
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1344
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1368
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1369
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1345
1370
|
|
1346
1371
|
@property
|
1347
1372
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|