snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -233,7 +235,6 @@ class KernelDensity(BaseTransformer):
|
|
233
235
|
sample_weight_col: Optional[str] = None,
|
234
236
|
) -> None:
|
235
237
|
super().__init__()
|
236
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
237
238
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
238
239
|
|
239
240
|
self._deps = list(deps)
|
@@ -261,6 +262,15 @@ class KernelDensity(BaseTransformer):
|
|
261
262
|
self.set_drop_input_cols(drop_input_cols)
|
262
263
|
self.set_sample_weight_col(sample_weight_col)
|
263
264
|
|
265
|
+
def _get_rand_id(self) -> str:
|
266
|
+
"""
|
267
|
+
Generate random id to be used in sproc and stage names.
|
268
|
+
|
269
|
+
Returns:
|
270
|
+
Random id string usable in sproc, table, and stage names.
|
271
|
+
"""
|
272
|
+
return str(uuid4()).replace("-", "_").upper()
|
273
|
+
|
264
274
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
265
275
|
"""
|
266
276
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -339,7 +349,7 @@ class KernelDensity(BaseTransformer):
|
|
339
349
|
cp.dump(self._sklearn_object, local_transform_file)
|
340
350
|
|
341
351
|
# Create temp stage to run fit.
|
342
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
352
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
343
353
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
344
354
|
SqlResultValidator(
|
345
355
|
session=session,
|
@@ -352,11 +362,12 @@ class KernelDensity(BaseTransformer):
|
|
352
362
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
353
363
|
).validate()
|
354
364
|
|
355
|
-
|
365
|
+
# Use posixpath to construct stage paths
|
366
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
367
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
356
368
|
local_result_file_name = get_temp_file_path()
|
357
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
358
369
|
|
359
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
370
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
360
371
|
statement_params = telemetry.get_function_usage_statement_params(
|
361
372
|
project=_PROJECT,
|
362
373
|
subproject=_SUBPROJECT,
|
@@ -382,6 +393,7 @@ class KernelDensity(BaseTransformer):
|
|
382
393
|
replace=True,
|
383
394
|
session=session,
|
384
395
|
statement_params=statement_params,
|
396
|
+
anonymous=True
|
385
397
|
)
|
386
398
|
def fit_wrapper_sproc(
|
387
399
|
session: Session,
|
@@ -390,7 +402,8 @@ class KernelDensity(BaseTransformer):
|
|
390
402
|
stage_result_file_name: str,
|
391
403
|
input_cols: List[str],
|
392
404
|
label_cols: List[str],
|
393
|
-
sample_weight_col: Optional[str]
|
405
|
+
sample_weight_col: Optional[str],
|
406
|
+
statement_params: Dict[str, str]
|
394
407
|
) -> str:
|
395
408
|
import cloudpickle as cp
|
396
409
|
import numpy as np
|
@@ -457,15 +470,15 @@ class KernelDensity(BaseTransformer):
|
|
457
470
|
api_calls=[Session.call],
|
458
471
|
custom_tags=dict([("autogen", True)]),
|
459
472
|
)
|
460
|
-
sproc_export_file_name =
|
461
|
-
|
473
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
474
|
+
session,
|
462
475
|
query,
|
463
476
|
stage_transform_file_name,
|
464
477
|
stage_result_file_name,
|
465
478
|
identifier.get_unescaped_names(self.input_cols),
|
466
479
|
identifier.get_unescaped_names(self.label_cols),
|
467
480
|
identifier.get_unescaped_names(self.sample_weight_col),
|
468
|
-
statement_params
|
481
|
+
statement_params,
|
469
482
|
)
|
470
483
|
|
471
484
|
if "|" in sproc_export_file_name:
|
@@ -475,7 +488,7 @@ class KernelDensity(BaseTransformer):
|
|
475
488
|
print("\n".join(fields[1:]))
|
476
489
|
|
477
490
|
session.file.get(
|
478
|
-
|
491
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
479
492
|
local_result_file_name,
|
480
493
|
statement_params=statement_params
|
481
494
|
)
|
@@ -521,7 +534,7 @@ class KernelDensity(BaseTransformer):
|
|
521
534
|
|
522
535
|
# Register vectorized UDF for batch inference
|
523
536
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
524
|
-
safe_id=self.
|
537
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
525
538
|
|
526
539
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
527
540
|
# will try to pickle all of self which fails.
|
@@ -613,7 +626,7 @@ class KernelDensity(BaseTransformer):
|
|
613
626
|
return transformed_pandas_df.to_dict("records")
|
614
627
|
|
615
628
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
616
|
-
safe_id=self.
|
629
|
+
safe_id=self._get_rand_id()
|
617
630
|
)
|
618
631
|
|
619
632
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -778,11 +791,18 @@ class KernelDensity(BaseTransformer):
|
|
778
791
|
Transformed dataset.
|
779
792
|
"""
|
780
793
|
if isinstance(dataset, DataFrame):
|
794
|
+
expected_type_inferred = ""
|
795
|
+
# when it is classifier, infer the datatype from label columns
|
796
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
797
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
798
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
799
|
+
)
|
800
|
+
|
781
801
|
output_df = self._batch_inference(
|
782
802
|
dataset=dataset,
|
783
803
|
inference_method="predict",
|
784
804
|
expected_output_cols_list=self.output_cols,
|
785
|
-
expected_output_cols_type=
|
805
|
+
expected_output_cols_type=expected_type_inferred,
|
786
806
|
)
|
787
807
|
elif isinstance(dataset, pd.DataFrame):
|
788
808
|
output_df = self._sklearn_inference(
|
@@ -853,10 +873,10 @@ class KernelDensity(BaseTransformer):
|
|
853
873
|
|
854
874
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
855
875
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
856
|
-
Returns
|
876
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
857
877
|
"""
|
858
878
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
859
|
-
return []
|
879
|
+
return [output_cols_prefix]
|
860
880
|
|
861
881
|
classes = self._sklearn_object.classes_
|
862
882
|
if isinstance(classes, numpy.ndarray):
|
@@ -1081,7 +1101,7 @@ class KernelDensity(BaseTransformer):
|
|
1081
1101
|
cp.dump(self._sklearn_object, local_score_file)
|
1082
1102
|
|
1083
1103
|
# Create temp stage to run score.
|
1084
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1104
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1085
1105
|
session = dataset._session
|
1086
1106
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1087
1107
|
SqlResultValidator(
|
@@ -1095,8 +1115,9 @@ class KernelDensity(BaseTransformer):
|
|
1095
1115
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1096
1116
|
).validate()
|
1097
1117
|
|
1098
|
-
|
1099
|
-
|
1118
|
+
# Use posixpath to construct stage paths
|
1119
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1120
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1100
1121
|
statement_params = telemetry.get_function_usage_statement_params(
|
1101
1122
|
project=_PROJECT,
|
1102
1123
|
subproject=_SUBPROJECT,
|
@@ -1122,6 +1143,7 @@ class KernelDensity(BaseTransformer):
|
|
1122
1143
|
replace=True,
|
1123
1144
|
session=session,
|
1124
1145
|
statement_params=statement_params,
|
1146
|
+
anonymous=True
|
1125
1147
|
)
|
1126
1148
|
def score_wrapper_sproc(
|
1127
1149
|
session: Session,
|
@@ -1129,7 +1151,8 @@ class KernelDensity(BaseTransformer):
|
|
1129
1151
|
stage_score_file_name: str,
|
1130
1152
|
input_cols: List[str],
|
1131
1153
|
label_cols: List[str],
|
1132
|
-
sample_weight_col: Optional[str]
|
1154
|
+
sample_weight_col: Optional[str],
|
1155
|
+
statement_params: Dict[str, str]
|
1133
1156
|
) -> float:
|
1134
1157
|
import cloudpickle as cp
|
1135
1158
|
import numpy as np
|
@@ -1179,14 +1202,14 @@ class KernelDensity(BaseTransformer):
|
|
1179
1202
|
api_calls=[Session.call],
|
1180
1203
|
custom_tags=dict([("autogen", True)]),
|
1181
1204
|
)
|
1182
|
-
score =
|
1183
|
-
|
1205
|
+
score = score_wrapper_sproc(
|
1206
|
+
session,
|
1184
1207
|
query,
|
1185
1208
|
stage_score_file_name,
|
1186
1209
|
identifier.get_unescaped_names(self.input_cols),
|
1187
1210
|
identifier.get_unescaped_names(self.label_cols),
|
1188
1211
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1189
|
-
statement_params
|
1212
|
+
statement_params,
|
1190
1213
|
)
|
1191
1214
|
|
1192
1215
|
cleanup_temp_files([local_score_file_name])
|
@@ -1204,18 +1227,20 @@ class KernelDensity(BaseTransformer):
|
|
1204
1227
|
if self._sklearn_object._estimator_type == 'classifier':
|
1205
1228
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1206
1229
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1207
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1230
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1231
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1208
1232
|
# For regressor, the type of predict is float64
|
1209
1233
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1210
1234
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1211
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1212
|
-
|
1235
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1236
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1213
1237
|
for prob_func in PROB_FUNCTIONS:
|
1214
1238
|
if hasattr(self, prob_func):
|
1215
1239
|
output_cols_prefix: str = f"{prob_func}_"
|
1216
1240
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1217
1241
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1218
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1242
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1243
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1219
1244
|
|
1220
1245
|
@property
|
1221
1246
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -261,7 +263,6 @@ class LocalOutlierFactor(BaseTransformer):
|
|
261
263
|
sample_weight_col: Optional[str] = None,
|
262
264
|
) -> None:
|
263
265
|
super().__init__()
|
264
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
265
266
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
266
267
|
|
267
268
|
self._deps = list(deps)
|
@@ -289,6 +290,15 @@ class LocalOutlierFactor(BaseTransformer):
|
|
289
290
|
self.set_drop_input_cols(drop_input_cols)
|
290
291
|
self.set_sample_weight_col(sample_weight_col)
|
291
292
|
|
293
|
+
def _get_rand_id(self) -> str:
|
294
|
+
"""
|
295
|
+
Generate random id to be used in sproc and stage names.
|
296
|
+
|
297
|
+
Returns:
|
298
|
+
Random id string usable in sproc, table, and stage names.
|
299
|
+
"""
|
300
|
+
return str(uuid4()).replace("-", "_").upper()
|
301
|
+
|
292
302
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
293
303
|
"""
|
294
304
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -367,7 +377,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
367
377
|
cp.dump(self._sklearn_object, local_transform_file)
|
368
378
|
|
369
379
|
# Create temp stage to run fit.
|
370
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
380
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
371
381
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
372
382
|
SqlResultValidator(
|
373
383
|
session=session,
|
@@ -380,11 +390,12 @@ class LocalOutlierFactor(BaseTransformer):
|
|
380
390
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
381
391
|
).validate()
|
382
392
|
|
383
|
-
|
393
|
+
# Use posixpath to construct stage paths
|
394
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
395
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
384
396
|
local_result_file_name = get_temp_file_path()
|
385
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
386
397
|
|
387
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
398
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
388
399
|
statement_params = telemetry.get_function_usage_statement_params(
|
389
400
|
project=_PROJECT,
|
390
401
|
subproject=_SUBPROJECT,
|
@@ -410,6 +421,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
410
421
|
replace=True,
|
411
422
|
session=session,
|
412
423
|
statement_params=statement_params,
|
424
|
+
anonymous=True
|
413
425
|
)
|
414
426
|
def fit_wrapper_sproc(
|
415
427
|
session: Session,
|
@@ -418,7 +430,8 @@ class LocalOutlierFactor(BaseTransformer):
|
|
418
430
|
stage_result_file_name: str,
|
419
431
|
input_cols: List[str],
|
420
432
|
label_cols: List[str],
|
421
|
-
sample_weight_col: Optional[str]
|
433
|
+
sample_weight_col: Optional[str],
|
434
|
+
statement_params: Dict[str, str]
|
422
435
|
) -> str:
|
423
436
|
import cloudpickle as cp
|
424
437
|
import numpy as np
|
@@ -485,15 +498,15 @@ class LocalOutlierFactor(BaseTransformer):
|
|
485
498
|
api_calls=[Session.call],
|
486
499
|
custom_tags=dict([("autogen", True)]),
|
487
500
|
)
|
488
|
-
sproc_export_file_name =
|
489
|
-
|
501
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
502
|
+
session,
|
490
503
|
query,
|
491
504
|
stage_transform_file_name,
|
492
505
|
stage_result_file_name,
|
493
506
|
identifier.get_unescaped_names(self.input_cols),
|
494
507
|
identifier.get_unescaped_names(self.label_cols),
|
495
508
|
identifier.get_unescaped_names(self.sample_weight_col),
|
496
|
-
statement_params
|
509
|
+
statement_params,
|
497
510
|
)
|
498
511
|
|
499
512
|
if "|" in sproc_export_file_name:
|
@@ -503,7 +516,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
503
516
|
print("\n".join(fields[1:]))
|
504
517
|
|
505
518
|
session.file.get(
|
506
|
-
|
519
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
507
520
|
local_result_file_name,
|
508
521
|
statement_params=statement_params
|
509
522
|
)
|
@@ -549,7 +562,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
549
562
|
|
550
563
|
# Register vectorized UDF for batch inference
|
551
564
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
552
|
-
safe_id=self.
|
565
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
553
566
|
|
554
567
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
555
568
|
# will try to pickle all of self which fails.
|
@@ -641,7 +654,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
641
654
|
return transformed_pandas_df.to_dict("records")
|
642
655
|
|
643
656
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
644
|
-
safe_id=self.
|
657
|
+
safe_id=self._get_rand_id()
|
645
658
|
)
|
646
659
|
|
647
660
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -808,11 +821,18 @@ class LocalOutlierFactor(BaseTransformer):
|
|
808
821
|
Transformed dataset.
|
809
822
|
"""
|
810
823
|
if isinstance(dataset, DataFrame):
|
824
|
+
expected_type_inferred = ""
|
825
|
+
# when it is classifier, infer the datatype from label columns
|
826
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
827
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
828
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
829
|
+
)
|
830
|
+
|
811
831
|
output_df = self._batch_inference(
|
812
832
|
dataset=dataset,
|
813
833
|
inference_method="predict",
|
814
834
|
expected_output_cols_list=self.output_cols,
|
815
|
-
expected_output_cols_type=
|
835
|
+
expected_output_cols_type=expected_type_inferred,
|
816
836
|
)
|
817
837
|
elif isinstance(dataset, pd.DataFrame):
|
818
838
|
output_df = self._sklearn_inference(
|
@@ -883,10 +903,10 @@ class LocalOutlierFactor(BaseTransformer):
|
|
883
903
|
|
884
904
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
885
905
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
886
|
-
Returns
|
906
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
887
907
|
"""
|
888
908
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
889
|
-
return []
|
909
|
+
return [output_cols_prefix]
|
890
910
|
|
891
911
|
classes = self._sklearn_object.classes_
|
892
912
|
if isinstance(classes, numpy.ndarray):
|
@@ -1113,7 +1133,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1113
1133
|
cp.dump(self._sklearn_object, local_score_file)
|
1114
1134
|
|
1115
1135
|
# Create temp stage to run score.
|
1116
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1136
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1117
1137
|
session = dataset._session
|
1118
1138
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1119
1139
|
SqlResultValidator(
|
@@ -1127,8 +1147,9 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1127
1147
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1128
1148
|
).validate()
|
1129
1149
|
|
1130
|
-
|
1131
|
-
|
1150
|
+
# Use posixpath to construct stage paths
|
1151
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1152
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1132
1153
|
statement_params = telemetry.get_function_usage_statement_params(
|
1133
1154
|
project=_PROJECT,
|
1134
1155
|
subproject=_SUBPROJECT,
|
@@ -1154,6 +1175,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1154
1175
|
replace=True,
|
1155
1176
|
session=session,
|
1156
1177
|
statement_params=statement_params,
|
1178
|
+
anonymous=True
|
1157
1179
|
)
|
1158
1180
|
def score_wrapper_sproc(
|
1159
1181
|
session: Session,
|
@@ -1161,7 +1183,8 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1161
1183
|
stage_score_file_name: str,
|
1162
1184
|
input_cols: List[str],
|
1163
1185
|
label_cols: List[str],
|
1164
|
-
sample_weight_col: Optional[str]
|
1186
|
+
sample_weight_col: Optional[str],
|
1187
|
+
statement_params: Dict[str, str]
|
1165
1188
|
) -> float:
|
1166
1189
|
import cloudpickle as cp
|
1167
1190
|
import numpy as np
|
@@ -1211,14 +1234,14 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1211
1234
|
api_calls=[Session.call],
|
1212
1235
|
custom_tags=dict([("autogen", True)]),
|
1213
1236
|
)
|
1214
|
-
score =
|
1215
|
-
|
1237
|
+
score = score_wrapper_sproc(
|
1238
|
+
session,
|
1216
1239
|
query,
|
1217
1240
|
stage_score_file_name,
|
1218
1241
|
identifier.get_unescaped_names(self.input_cols),
|
1219
1242
|
identifier.get_unescaped_names(self.label_cols),
|
1220
1243
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1221
|
-
statement_params
|
1244
|
+
statement_params,
|
1222
1245
|
)
|
1223
1246
|
|
1224
1247
|
cleanup_temp_files([local_score_file_name])
|
@@ -1236,18 +1259,20 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1236
1259
|
if self._sklearn_object._estimator_type == 'classifier':
|
1237
1260
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1238
1261
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1239
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1262
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1263
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1240
1264
|
# For regressor, the type of predict is float64
|
1241
1265
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1242
1266
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1243
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1244
|
-
|
1267
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1268
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1245
1269
|
for prob_func in PROB_FUNCTIONS:
|
1246
1270
|
if hasattr(self, prob_func):
|
1247
1271
|
output_cols_prefix: str = f"{prob_func}_"
|
1248
1272
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1249
1273
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1250
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1274
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1275
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1251
1276
|
|
1252
1277
|
@property
|
1253
1278
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|