snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -299,7 +301,6 @@ class OPTICS(BaseTransformer):
|
|
299
301
|
sample_weight_col: Optional[str] = None,
|
300
302
|
) -> None:
|
301
303
|
super().__init__()
|
302
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
303
304
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
304
305
|
|
305
306
|
self._deps = list(deps)
|
@@ -332,6 +333,15 @@ class OPTICS(BaseTransformer):
|
|
332
333
|
self.set_drop_input_cols(drop_input_cols)
|
333
334
|
self.set_sample_weight_col(sample_weight_col)
|
334
335
|
|
336
|
+
def _get_rand_id(self) -> str:
|
337
|
+
"""
|
338
|
+
Generate random id to be used in sproc and stage names.
|
339
|
+
|
340
|
+
Returns:
|
341
|
+
Random id string usable in sproc, table, and stage names.
|
342
|
+
"""
|
343
|
+
return str(uuid4()).replace("-", "_").upper()
|
344
|
+
|
335
345
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
336
346
|
"""
|
337
347
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -410,7 +420,7 @@ class OPTICS(BaseTransformer):
|
|
410
420
|
cp.dump(self._sklearn_object, local_transform_file)
|
411
421
|
|
412
422
|
# Create temp stage to run fit.
|
413
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
423
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
414
424
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
415
425
|
SqlResultValidator(
|
416
426
|
session=session,
|
@@ -423,11 +433,12 @@ class OPTICS(BaseTransformer):
|
|
423
433
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
424
434
|
).validate()
|
425
435
|
|
426
|
-
|
436
|
+
# Use posixpath to construct stage paths
|
437
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
438
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
427
439
|
local_result_file_name = get_temp_file_path()
|
428
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
429
440
|
|
430
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
441
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
431
442
|
statement_params = telemetry.get_function_usage_statement_params(
|
432
443
|
project=_PROJECT,
|
433
444
|
subproject=_SUBPROJECT,
|
@@ -453,6 +464,7 @@ class OPTICS(BaseTransformer):
|
|
453
464
|
replace=True,
|
454
465
|
session=session,
|
455
466
|
statement_params=statement_params,
|
467
|
+
anonymous=True
|
456
468
|
)
|
457
469
|
def fit_wrapper_sproc(
|
458
470
|
session: Session,
|
@@ -461,7 +473,8 @@ class OPTICS(BaseTransformer):
|
|
461
473
|
stage_result_file_name: str,
|
462
474
|
input_cols: List[str],
|
463
475
|
label_cols: List[str],
|
464
|
-
sample_weight_col: Optional[str]
|
476
|
+
sample_weight_col: Optional[str],
|
477
|
+
statement_params: Dict[str, str]
|
465
478
|
) -> str:
|
466
479
|
import cloudpickle as cp
|
467
480
|
import numpy as np
|
@@ -528,15 +541,15 @@ class OPTICS(BaseTransformer):
|
|
528
541
|
api_calls=[Session.call],
|
529
542
|
custom_tags=dict([("autogen", True)]),
|
530
543
|
)
|
531
|
-
sproc_export_file_name =
|
532
|
-
|
544
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
545
|
+
session,
|
533
546
|
query,
|
534
547
|
stage_transform_file_name,
|
535
548
|
stage_result_file_name,
|
536
549
|
identifier.get_unescaped_names(self.input_cols),
|
537
550
|
identifier.get_unescaped_names(self.label_cols),
|
538
551
|
identifier.get_unescaped_names(self.sample_weight_col),
|
539
|
-
statement_params
|
552
|
+
statement_params,
|
540
553
|
)
|
541
554
|
|
542
555
|
if "|" in sproc_export_file_name:
|
@@ -546,7 +559,7 @@ class OPTICS(BaseTransformer):
|
|
546
559
|
print("\n".join(fields[1:]))
|
547
560
|
|
548
561
|
session.file.get(
|
549
|
-
|
562
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
550
563
|
local_result_file_name,
|
551
564
|
statement_params=statement_params
|
552
565
|
)
|
@@ -592,7 +605,7 @@ class OPTICS(BaseTransformer):
|
|
592
605
|
|
593
606
|
# Register vectorized UDF for batch inference
|
594
607
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
595
|
-
safe_id=self.
|
608
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
596
609
|
|
597
610
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
598
611
|
# will try to pickle all of self which fails.
|
@@ -684,7 +697,7 @@ class OPTICS(BaseTransformer):
|
|
684
697
|
return transformed_pandas_df.to_dict("records")
|
685
698
|
|
686
699
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
687
|
-
safe_id=self.
|
700
|
+
safe_id=self._get_rand_id()
|
688
701
|
)
|
689
702
|
|
690
703
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -849,11 +862,18 @@ class OPTICS(BaseTransformer):
|
|
849
862
|
Transformed dataset.
|
850
863
|
"""
|
851
864
|
if isinstance(dataset, DataFrame):
|
865
|
+
expected_type_inferred = ""
|
866
|
+
# when it is classifier, infer the datatype from label columns
|
867
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
868
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
869
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
870
|
+
)
|
871
|
+
|
852
872
|
output_df = self._batch_inference(
|
853
873
|
dataset=dataset,
|
854
874
|
inference_method="predict",
|
855
875
|
expected_output_cols_list=self.output_cols,
|
856
|
-
expected_output_cols_type=
|
876
|
+
expected_output_cols_type=expected_type_inferred,
|
857
877
|
)
|
858
878
|
elif isinstance(dataset, pd.DataFrame):
|
859
879
|
output_df = self._sklearn_inference(
|
@@ -924,10 +944,10 @@ class OPTICS(BaseTransformer):
|
|
924
944
|
|
925
945
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
926
946
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
927
|
-
Returns
|
947
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
928
948
|
"""
|
929
949
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
930
|
-
return []
|
950
|
+
return [output_cols_prefix]
|
931
951
|
|
932
952
|
classes = self._sklearn_object.classes_
|
933
953
|
if isinstance(classes, numpy.ndarray):
|
@@ -1152,7 +1172,7 @@ class OPTICS(BaseTransformer):
|
|
1152
1172
|
cp.dump(self._sklearn_object, local_score_file)
|
1153
1173
|
|
1154
1174
|
# Create temp stage to run score.
|
1155
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1175
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1156
1176
|
session = dataset._session
|
1157
1177
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1158
1178
|
SqlResultValidator(
|
@@ -1166,8 +1186,9 @@ class OPTICS(BaseTransformer):
|
|
1166
1186
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1167
1187
|
).validate()
|
1168
1188
|
|
1169
|
-
|
1170
|
-
|
1189
|
+
# Use posixpath to construct stage paths
|
1190
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1191
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1171
1192
|
statement_params = telemetry.get_function_usage_statement_params(
|
1172
1193
|
project=_PROJECT,
|
1173
1194
|
subproject=_SUBPROJECT,
|
@@ -1193,6 +1214,7 @@ class OPTICS(BaseTransformer):
|
|
1193
1214
|
replace=True,
|
1194
1215
|
session=session,
|
1195
1216
|
statement_params=statement_params,
|
1217
|
+
anonymous=True
|
1196
1218
|
)
|
1197
1219
|
def score_wrapper_sproc(
|
1198
1220
|
session: Session,
|
@@ -1200,7 +1222,8 @@ class OPTICS(BaseTransformer):
|
|
1200
1222
|
stage_score_file_name: str,
|
1201
1223
|
input_cols: List[str],
|
1202
1224
|
label_cols: List[str],
|
1203
|
-
sample_weight_col: Optional[str]
|
1225
|
+
sample_weight_col: Optional[str],
|
1226
|
+
statement_params: Dict[str, str]
|
1204
1227
|
) -> float:
|
1205
1228
|
import cloudpickle as cp
|
1206
1229
|
import numpy as np
|
@@ -1250,14 +1273,14 @@ class OPTICS(BaseTransformer):
|
|
1250
1273
|
api_calls=[Session.call],
|
1251
1274
|
custom_tags=dict([("autogen", True)]),
|
1252
1275
|
)
|
1253
|
-
score =
|
1254
|
-
|
1276
|
+
score = score_wrapper_sproc(
|
1277
|
+
session,
|
1255
1278
|
query,
|
1256
1279
|
stage_score_file_name,
|
1257
1280
|
identifier.get_unescaped_names(self.input_cols),
|
1258
1281
|
identifier.get_unescaped_names(self.label_cols),
|
1259
1282
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1260
|
-
statement_params
|
1283
|
+
statement_params,
|
1261
1284
|
)
|
1262
1285
|
|
1263
1286
|
cleanup_temp_files([local_score_file_name])
|
@@ -1275,18 +1298,20 @@ class OPTICS(BaseTransformer):
|
|
1275
1298
|
if self._sklearn_object._estimator_type == 'classifier':
|
1276
1299
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1277
1300
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1278
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1301
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1302
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1279
1303
|
# For regressor, the type of predict is float64
|
1280
1304
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1281
1305
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1282
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1283
|
-
|
1306
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1307
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1284
1308
|
for prob_func in PROB_FUNCTIONS:
|
1285
1309
|
if hasattr(self, prob_func):
|
1286
1310
|
output_cols_prefix: str = f"{prob_func}_"
|
1287
1311
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1288
1312
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1289
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1313
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1314
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1290
1315
|
|
1291
1316
|
@property
|
1292
1317
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -241,7 +243,6 @@ class SpectralBiclustering(BaseTransformer):
|
|
241
243
|
sample_weight_col: Optional[str] = None,
|
242
244
|
) -> None:
|
243
245
|
super().__init__()
|
244
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
245
246
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
246
247
|
|
247
248
|
self._deps = list(deps)
|
@@ -270,6 +271,15 @@ class SpectralBiclustering(BaseTransformer):
|
|
270
271
|
self.set_drop_input_cols(drop_input_cols)
|
271
272
|
self.set_sample_weight_col(sample_weight_col)
|
272
273
|
|
274
|
+
def _get_rand_id(self) -> str:
|
275
|
+
"""
|
276
|
+
Generate random id to be used in sproc and stage names.
|
277
|
+
|
278
|
+
Returns:
|
279
|
+
Random id string usable in sproc, table, and stage names.
|
280
|
+
"""
|
281
|
+
return str(uuid4()).replace("-", "_").upper()
|
282
|
+
|
273
283
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
274
284
|
"""
|
275
285
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -348,7 +358,7 @@ class SpectralBiclustering(BaseTransformer):
|
|
348
358
|
cp.dump(self._sklearn_object, local_transform_file)
|
349
359
|
|
350
360
|
# Create temp stage to run fit.
|
351
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
361
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
352
362
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
353
363
|
SqlResultValidator(
|
354
364
|
session=session,
|
@@ -361,11 +371,12 @@ class SpectralBiclustering(BaseTransformer):
|
|
361
371
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
362
372
|
).validate()
|
363
373
|
|
364
|
-
|
374
|
+
# Use posixpath to construct stage paths
|
375
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
376
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
365
377
|
local_result_file_name = get_temp_file_path()
|
366
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
367
378
|
|
368
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
379
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
369
380
|
statement_params = telemetry.get_function_usage_statement_params(
|
370
381
|
project=_PROJECT,
|
371
382
|
subproject=_SUBPROJECT,
|
@@ -391,6 +402,7 @@ class SpectralBiclustering(BaseTransformer):
|
|
391
402
|
replace=True,
|
392
403
|
session=session,
|
393
404
|
statement_params=statement_params,
|
405
|
+
anonymous=True
|
394
406
|
)
|
395
407
|
def fit_wrapper_sproc(
|
396
408
|
session: Session,
|
@@ -399,7 +411,8 @@ class SpectralBiclustering(BaseTransformer):
|
|
399
411
|
stage_result_file_name: str,
|
400
412
|
input_cols: List[str],
|
401
413
|
label_cols: List[str],
|
402
|
-
sample_weight_col: Optional[str]
|
414
|
+
sample_weight_col: Optional[str],
|
415
|
+
statement_params: Dict[str, str]
|
403
416
|
) -> str:
|
404
417
|
import cloudpickle as cp
|
405
418
|
import numpy as np
|
@@ -466,15 +479,15 @@ class SpectralBiclustering(BaseTransformer):
|
|
466
479
|
api_calls=[Session.call],
|
467
480
|
custom_tags=dict([("autogen", True)]),
|
468
481
|
)
|
469
|
-
sproc_export_file_name =
|
470
|
-
|
482
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
483
|
+
session,
|
471
484
|
query,
|
472
485
|
stage_transform_file_name,
|
473
486
|
stage_result_file_name,
|
474
487
|
identifier.get_unescaped_names(self.input_cols),
|
475
488
|
identifier.get_unescaped_names(self.label_cols),
|
476
489
|
identifier.get_unescaped_names(self.sample_weight_col),
|
477
|
-
statement_params
|
490
|
+
statement_params,
|
478
491
|
)
|
479
492
|
|
480
493
|
if "|" in sproc_export_file_name:
|
@@ -484,7 +497,7 @@ class SpectralBiclustering(BaseTransformer):
|
|
484
497
|
print("\n".join(fields[1:]))
|
485
498
|
|
486
499
|
session.file.get(
|
487
|
-
|
500
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
488
501
|
local_result_file_name,
|
489
502
|
statement_params=statement_params
|
490
503
|
)
|
@@ -530,7 +543,7 @@ class SpectralBiclustering(BaseTransformer):
|
|
530
543
|
|
531
544
|
# Register vectorized UDF for batch inference
|
532
545
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
533
|
-
safe_id=self.
|
546
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
534
547
|
|
535
548
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
536
549
|
# will try to pickle all of self which fails.
|
@@ -622,7 +635,7 @@ class SpectralBiclustering(BaseTransformer):
|
|
622
635
|
return transformed_pandas_df.to_dict("records")
|
623
636
|
|
624
637
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
625
|
-
safe_id=self.
|
638
|
+
safe_id=self._get_rand_id()
|
626
639
|
)
|
627
640
|
|
628
641
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -787,11 +800,18 @@ class SpectralBiclustering(BaseTransformer):
|
|
787
800
|
Transformed dataset.
|
788
801
|
"""
|
789
802
|
if isinstance(dataset, DataFrame):
|
803
|
+
expected_type_inferred = ""
|
804
|
+
# when it is classifier, infer the datatype from label columns
|
805
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
806
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
807
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
808
|
+
)
|
809
|
+
|
790
810
|
output_df = self._batch_inference(
|
791
811
|
dataset=dataset,
|
792
812
|
inference_method="predict",
|
793
813
|
expected_output_cols_list=self.output_cols,
|
794
|
-
expected_output_cols_type=
|
814
|
+
expected_output_cols_type=expected_type_inferred,
|
795
815
|
)
|
796
816
|
elif isinstance(dataset, pd.DataFrame):
|
797
817
|
output_df = self._sklearn_inference(
|
@@ -862,10 +882,10 @@ class SpectralBiclustering(BaseTransformer):
|
|
862
882
|
|
863
883
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
864
884
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
865
|
-
Returns
|
885
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
866
886
|
"""
|
867
887
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
868
|
-
return []
|
888
|
+
return [output_cols_prefix]
|
869
889
|
|
870
890
|
classes = self._sklearn_object.classes_
|
871
891
|
if isinstance(classes, numpy.ndarray):
|
@@ -1090,7 +1110,7 @@ class SpectralBiclustering(BaseTransformer):
|
|
1090
1110
|
cp.dump(self._sklearn_object, local_score_file)
|
1091
1111
|
|
1092
1112
|
# Create temp stage to run score.
|
1093
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1113
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1094
1114
|
session = dataset._session
|
1095
1115
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1096
1116
|
SqlResultValidator(
|
@@ -1104,8 +1124,9 @@ class SpectralBiclustering(BaseTransformer):
|
|
1104
1124
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1105
1125
|
).validate()
|
1106
1126
|
|
1107
|
-
|
1108
|
-
|
1127
|
+
# Use posixpath to construct stage paths
|
1128
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1129
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1109
1130
|
statement_params = telemetry.get_function_usage_statement_params(
|
1110
1131
|
project=_PROJECT,
|
1111
1132
|
subproject=_SUBPROJECT,
|
@@ -1131,6 +1152,7 @@ class SpectralBiclustering(BaseTransformer):
|
|
1131
1152
|
replace=True,
|
1132
1153
|
session=session,
|
1133
1154
|
statement_params=statement_params,
|
1155
|
+
anonymous=True
|
1134
1156
|
)
|
1135
1157
|
def score_wrapper_sproc(
|
1136
1158
|
session: Session,
|
@@ -1138,7 +1160,8 @@ class SpectralBiclustering(BaseTransformer):
|
|
1138
1160
|
stage_score_file_name: str,
|
1139
1161
|
input_cols: List[str],
|
1140
1162
|
label_cols: List[str],
|
1141
|
-
sample_weight_col: Optional[str]
|
1163
|
+
sample_weight_col: Optional[str],
|
1164
|
+
statement_params: Dict[str, str]
|
1142
1165
|
) -> float:
|
1143
1166
|
import cloudpickle as cp
|
1144
1167
|
import numpy as np
|
@@ -1188,14 +1211,14 @@ class SpectralBiclustering(BaseTransformer):
|
|
1188
1211
|
api_calls=[Session.call],
|
1189
1212
|
custom_tags=dict([("autogen", True)]),
|
1190
1213
|
)
|
1191
|
-
score =
|
1192
|
-
|
1214
|
+
score = score_wrapper_sproc(
|
1215
|
+
session,
|
1193
1216
|
query,
|
1194
1217
|
stage_score_file_name,
|
1195
1218
|
identifier.get_unescaped_names(self.input_cols),
|
1196
1219
|
identifier.get_unescaped_names(self.label_cols),
|
1197
1220
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1198
|
-
statement_params
|
1221
|
+
statement_params,
|
1199
1222
|
)
|
1200
1223
|
|
1201
1224
|
cleanup_temp_files([local_score_file_name])
|
@@ -1213,18 +1236,20 @@ class SpectralBiclustering(BaseTransformer):
|
|
1213
1236
|
if self._sklearn_object._estimator_type == 'classifier':
|
1214
1237
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1215
1238
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1216
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1239
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1240
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1217
1241
|
# For regressor, the type of predict is float64
|
1218
1242
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1219
1243
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1220
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
-
|
1244
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1245
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1222
1246
|
for prob_func in PROB_FUNCTIONS:
|
1223
1247
|
if hasattr(self, prob_func):
|
1224
1248
|
output_cols_prefix: str = f"{prob_func}_"
|
1225
1249
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1226
1250
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1227
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1251
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1252
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1228
1253
|
|
1229
1254
|
@property
|
1230
1255
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|