snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -294,7 +296,6 @@ class SpectralClustering(BaseTransformer):
|
|
294
296
|
sample_weight_col: Optional[str] = None,
|
295
297
|
) -> None:
|
296
298
|
super().__init__()
|
297
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
298
299
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
299
300
|
|
300
301
|
self._deps = list(deps)
|
@@ -328,6 +329,15 @@ class SpectralClustering(BaseTransformer):
|
|
328
329
|
self.set_drop_input_cols(drop_input_cols)
|
329
330
|
self.set_sample_weight_col(sample_weight_col)
|
330
331
|
|
332
|
+
def _get_rand_id(self) -> str:
|
333
|
+
"""
|
334
|
+
Generate random id to be used in sproc and stage names.
|
335
|
+
|
336
|
+
Returns:
|
337
|
+
Random id string usable in sproc, table, and stage names.
|
338
|
+
"""
|
339
|
+
return str(uuid4()).replace("-", "_").upper()
|
340
|
+
|
331
341
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
332
342
|
"""
|
333
343
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -406,7 +416,7 @@ class SpectralClustering(BaseTransformer):
|
|
406
416
|
cp.dump(self._sklearn_object, local_transform_file)
|
407
417
|
|
408
418
|
# Create temp stage to run fit.
|
409
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
419
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
410
420
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
411
421
|
SqlResultValidator(
|
412
422
|
session=session,
|
@@ -419,11 +429,12 @@ class SpectralClustering(BaseTransformer):
|
|
419
429
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
420
430
|
).validate()
|
421
431
|
|
422
|
-
|
432
|
+
# Use posixpath to construct stage paths
|
433
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
434
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
423
435
|
local_result_file_name = get_temp_file_path()
|
424
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
425
436
|
|
426
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
437
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
427
438
|
statement_params = telemetry.get_function_usage_statement_params(
|
428
439
|
project=_PROJECT,
|
429
440
|
subproject=_SUBPROJECT,
|
@@ -449,6 +460,7 @@ class SpectralClustering(BaseTransformer):
|
|
449
460
|
replace=True,
|
450
461
|
session=session,
|
451
462
|
statement_params=statement_params,
|
463
|
+
anonymous=True
|
452
464
|
)
|
453
465
|
def fit_wrapper_sproc(
|
454
466
|
session: Session,
|
@@ -457,7 +469,8 @@ class SpectralClustering(BaseTransformer):
|
|
457
469
|
stage_result_file_name: str,
|
458
470
|
input_cols: List[str],
|
459
471
|
label_cols: List[str],
|
460
|
-
sample_weight_col: Optional[str]
|
472
|
+
sample_weight_col: Optional[str],
|
473
|
+
statement_params: Dict[str, str]
|
461
474
|
) -> str:
|
462
475
|
import cloudpickle as cp
|
463
476
|
import numpy as np
|
@@ -524,15 +537,15 @@ class SpectralClustering(BaseTransformer):
|
|
524
537
|
api_calls=[Session.call],
|
525
538
|
custom_tags=dict([("autogen", True)]),
|
526
539
|
)
|
527
|
-
sproc_export_file_name =
|
528
|
-
|
540
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
541
|
+
session,
|
529
542
|
query,
|
530
543
|
stage_transform_file_name,
|
531
544
|
stage_result_file_name,
|
532
545
|
identifier.get_unescaped_names(self.input_cols),
|
533
546
|
identifier.get_unescaped_names(self.label_cols),
|
534
547
|
identifier.get_unescaped_names(self.sample_weight_col),
|
535
|
-
statement_params
|
548
|
+
statement_params,
|
536
549
|
)
|
537
550
|
|
538
551
|
if "|" in sproc_export_file_name:
|
@@ -542,7 +555,7 @@ class SpectralClustering(BaseTransformer):
|
|
542
555
|
print("\n".join(fields[1:]))
|
543
556
|
|
544
557
|
session.file.get(
|
545
|
-
|
558
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
546
559
|
local_result_file_name,
|
547
560
|
statement_params=statement_params
|
548
561
|
)
|
@@ -588,7 +601,7 @@ class SpectralClustering(BaseTransformer):
|
|
588
601
|
|
589
602
|
# Register vectorized UDF for batch inference
|
590
603
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
591
|
-
safe_id=self.
|
604
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
592
605
|
|
593
606
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
594
607
|
# will try to pickle all of self which fails.
|
@@ -680,7 +693,7 @@ class SpectralClustering(BaseTransformer):
|
|
680
693
|
return transformed_pandas_df.to_dict("records")
|
681
694
|
|
682
695
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
683
|
-
safe_id=self.
|
696
|
+
safe_id=self._get_rand_id()
|
684
697
|
)
|
685
698
|
|
686
699
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -845,11 +858,18 @@ class SpectralClustering(BaseTransformer):
|
|
845
858
|
Transformed dataset.
|
846
859
|
"""
|
847
860
|
if isinstance(dataset, DataFrame):
|
861
|
+
expected_type_inferred = ""
|
862
|
+
# when it is classifier, infer the datatype from label columns
|
863
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
864
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
865
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
866
|
+
)
|
867
|
+
|
848
868
|
output_df = self._batch_inference(
|
849
869
|
dataset=dataset,
|
850
870
|
inference_method="predict",
|
851
871
|
expected_output_cols_list=self.output_cols,
|
852
|
-
expected_output_cols_type=
|
872
|
+
expected_output_cols_type=expected_type_inferred,
|
853
873
|
)
|
854
874
|
elif isinstance(dataset, pd.DataFrame):
|
855
875
|
output_df = self._sklearn_inference(
|
@@ -920,10 +940,10 @@ class SpectralClustering(BaseTransformer):
|
|
920
940
|
|
921
941
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
922
942
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
923
|
-
Returns
|
943
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
924
944
|
"""
|
925
945
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
926
|
-
return []
|
946
|
+
return [output_cols_prefix]
|
927
947
|
|
928
948
|
classes = self._sklearn_object.classes_
|
929
949
|
if isinstance(classes, numpy.ndarray):
|
@@ -1148,7 +1168,7 @@ class SpectralClustering(BaseTransformer):
|
|
1148
1168
|
cp.dump(self._sklearn_object, local_score_file)
|
1149
1169
|
|
1150
1170
|
# Create temp stage to run score.
|
1151
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1171
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1152
1172
|
session = dataset._session
|
1153
1173
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1154
1174
|
SqlResultValidator(
|
@@ -1162,8 +1182,9 @@ class SpectralClustering(BaseTransformer):
|
|
1162
1182
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1163
1183
|
).validate()
|
1164
1184
|
|
1165
|
-
|
1166
|
-
|
1185
|
+
# Use posixpath to construct stage paths
|
1186
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1187
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1167
1188
|
statement_params = telemetry.get_function_usage_statement_params(
|
1168
1189
|
project=_PROJECT,
|
1169
1190
|
subproject=_SUBPROJECT,
|
@@ -1189,6 +1210,7 @@ class SpectralClustering(BaseTransformer):
|
|
1189
1210
|
replace=True,
|
1190
1211
|
session=session,
|
1191
1212
|
statement_params=statement_params,
|
1213
|
+
anonymous=True
|
1192
1214
|
)
|
1193
1215
|
def score_wrapper_sproc(
|
1194
1216
|
session: Session,
|
@@ -1196,7 +1218,8 @@ class SpectralClustering(BaseTransformer):
|
|
1196
1218
|
stage_score_file_name: str,
|
1197
1219
|
input_cols: List[str],
|
1198
1220
|
label_cols: List[str],
|
1199
|
-
sample_weight_col: Optional[str]
|
1221
|
+
sample_weight_col: Optional[str],
|
1222
|
+
statement_params: Dict[str, str]
|
1200
1223
|
) -> float:
|
1201
1224
|
import cloudpickle as cp
|
1202
1225
|
import numpy as np
|
@@ -1246,14 +1269,14 @@ class SpectralClustering(BaseTransformer):
|
|
1246
1269
|
api_calls=[Session.call],
|
1247
1270
|
custom_tags=dict([("autogen", True)]),
|
1248
1271
|
)
|
1249
|
-
score =
|
1250
|
-
|
1272
|
+
score = score_wrapper_sproc(
|
1273
|
+
session,
|
1251
1274
|
query,
|
1252
1275
|
stage_score_file_name,
|
1253
1276
|
identifier.get_unescaped_names(self.input_cols),
|
1254
1277
|
identifier.get_unescaped_names(self.label_cols),
|
1255
1278
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1256
|
-
statement_params
|
1279
|
+
statement_params,
|
1257
1280
|
)
|
1258
1281
|
|
1259
1282
|
cleanup_temp_files([local_score_file_name])
|
@@ -1271,18 +1294,20 @@ class SpectralClustering(BaseTransformer):
|
|
1271
1294
|
if self._sklearn_object._estimator_type == 'classifier':
|
1272
1295
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1273
1296
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1274
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1297
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1298
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1275
1299
|
# For regressor, the type of predict is float64
|
1276
1300
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1277
1301
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1278
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1279
|
-
|
1302
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1303
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1280
1304
|
for prob_func in PROB_FUNCTIONS:
|
1281
1305
|
if hasattr(self, prob_func):
|
1282
1306
|
output_cols_prefix: str = f"{prob_func}_"
|
1283
1307
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1284
1308
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1285
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1309
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1310
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1286
1311
|
|
1287
1312
|
@property
|
1288
1313
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -223,7 +225,6 @@ class SpectralCoclustering(BaseTransformer):
|
|
223
225
|
sample_weight_col: Optional[str] = None,
|
224
226
|
) -> None:
|
225
227
|
super().__init__()
|
226
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
227
228
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
228
229
|
|
229
230
|
self._deps = list(deps)
|
@@ -249,6 +250,15 @@ class SpectralCoclustering(BaseTransformer):
|
|
249
250
|
self.set_drop_input_cols(drop_input_cols)
|
250
251
|
self.set_sample_weight_col(sample_weight_col)
|
251
252
|
|
253
|
+
def _get_rand_id(self) -> str:
|
254
|
+
"""
|
255
|
+
Generate random id to be used in sproc and stage names.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
Random id string usable in sproc, table, and stage names.
|
259
|
+
"""
|
260
|
+
return str(uuid4()).replace("-", "_").upper()
|
261
|
+
|
252
262
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
253
263
|
"""
|
254
264
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -327,7 +337,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
327
337
|
cp.dump(self._sklearn_object, local_transform_file)
|
328
338
|
|
329
339
|
# Create temp stage to run fit.
|
330
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
340
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
331
341
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
332
342
|
SqlResultValidator(
|
333
343
|
session=session,
|
@@ -340,11 +350,12 @@ class SpectralCoclustering(BaseTransformer):
|
|
340
350
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
341
351
|
).validate()
|
342
352
|
|
343
|
-
|
353
|
+
# Use posixpath to construct stage paths
|
354
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
355
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
344
356
|
local_result_file_name = get_temp_file_path()
|
345
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
357
|
|
347
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
358
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
348
359
|
statement_params = telemetry.get_function_usage_statement_params(
|
349
360
|
project=_PROJECT,
|
350
361
|
subproject=_SUBPROJECT,
|
@@ -370,6 +381,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
370
381
|
replace=True,
|
371
382
|
session=session,
|
372
383
|
statement_params=statement_params,
|
384
|
+
anonymous=True
|
373
385
|
)
|
374
386
|
def fit_wrapper_sproc(
|
375
387
|
session: Session,
|
@@ -378,7 +390,8 @@ class SpectralCoclustering(BaseTransformer):
|
|
378
390
|
stage_result_file_name: str,
|
379
391
|
input_cols: List[str],
|
380
392
|
label_cols: List[str],
|
381
|
-
sample_weight_col: Optional[str]
|
393
|
+
sample_weight_col: Optional[str],
|
394
|
+
statement_params: Dict[str, str]
|
382
395
|
) -> str:
|
383
396
|
import cloudpickle as cp
|
384
397
|
import numpy as np
|
@@ -445,15 +458,15 @@ class SpectralCoclustering(BaseTransformer):
|
|
445
458
|
api_calls=[Session.call],
|
446
459
|
custom_tags=dict([("autogen", True)]),
|
447
460
|
)
|
448
|
-
sproc_export_file_name =
|
449
|
-
|
461
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
462
|
+
session,
|
450
463
|
query,
|
451
464
|
stage_transform_file_name,
|
452
465
|
stage_result_file_name,
|
453
466
|
identifier.get_unescaped_names(self.input_cols),
|
454
467
|
identifier.get_unescaped_names(self.label_cols),
|
455
468
|
identifier.get_unescaped_names(self.sample_weight_col),
|
456
|
-
statement_params
|
469
|
+
statement_params,
|
457
470
|
)
|
458
471
|
|
459
472
|
if "|" in sproc_export_file_name:
|
@@ -463,7 +476,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
463
476
|
print("\n".join(fields[1:]))
|
464
477
|
|
465
478
|
session.file.get(
|
466
|
-
|
479
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
467
480
|
local_result_file_name,
|
468
481
|
statement_params=statement_params
|
469
482
|
)
|
@@ -509,7 +522,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
509
522
|
|
510
523
|
# Register vectorized UDF for batch inference
|
511
524
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
512
|
-
safe_id=self.
|
525
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
513
526
|
|
514
527
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
515
528
|
# will try to pickle all of self which fails.
|
@@ -601,7 +614,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
601
614
|
return transformed_pandas_df.to_dict("records")
|
602
615
|
|
603
616
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
604
|
-
safe_id=self.
|
617
|
+
safe_id=self._get_rand_id()
|
605
618
|
)
|
606
619
|
|
607
620
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -766,11 +779,18 @@ class SpectralCoclustering(BaseTransformer):
|
|
766
779
|
Transformed dataset.
|
767
780
|
"""
|
768
781
|
if isinstance(dataset, DataFrame):
|
782
|
+
expected_type_inferred = ""
|
783
|
+
# when it is classifier, infer the datatype from label columns
|
784
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
785
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
786
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
787
|
+
)
|
788
|
+
|
769
789
|
output_df = self._batch_inference(
|
770
790
|
dataset=dataset,
|
771
791
|
inference_method="predict",
|
772
792
|
expected_output_cols_list=self.output_cols,
|
773
|
-
expected_output_cols_type=
|
793
|
+
expected_output_cols_type=expected_type_inferred,
|
774
794
|
)
|
775
795
|
elif isinstance(dataset, pd.DataFrame):
|
776
796
|
output_df = self._sklearn_inference(
|
@@ -841,10 +861,10 @@ class SpectralCoclustering(BaseTransformer):
|
|
841
861
|
|
842
862
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
843
863
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
844
|
-
Returns
|
864
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
845
865
|
"""
|
846
866
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
847
|
-
return []
|
867
|
+
return [output_cols_prefix]
|
848
868
|
|
849
869
|
classes = self._sklearn_object.classes_
|
850
870
|
if isinstance(classes, numpy.ndarray):
|
@@ -1069,7 +1089,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
1069
1089
|
cp.dump(self._sklearn_object, local_score_file)
|
1070
1090
|
|
1071
1091
|
# Create temp stage to run score.
|
1072
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1092
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1073
1093
|
session = dataset._session
|
1074
1094
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1075
1095
|
SqlResultValidator(
|
@@ -1083,8 +1103,9 @@ class SpectralCoclustering(BaseTransformer):
|
|
1083
1103
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1084
1104
|
).validate()
|
1085
1105
|
|
1086
|
-
|
1087
|
-
|
1106
|
+
# Use posixpath to construct stage paths
|
1107
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1108
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1088
1109
|
statement_params = telemetry.get_function_usage_statement_params(
|
1089
1110
|
project=_PROJECT,
|
1090
1111
|
subproject=_SUBPROJECT,
|
@@ -1110,6 +1131,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
1110
1131
|
replace=True,
|
1111
1132
|
session=session,
|
1112
1133
|
statement_params=statement_params,
|
1134
|
+
anonymous=True
|
1113
1135
|
)
|
1114
1136
|
def score_wrapper_sproc(
|
1115
1137
|
session: Session,
|
@@ -1117,7 +1139,8 @@ class SpectralCoclustering(BaseTransformer):
|
|
1117
1139
|
stage_score_file_name: str,
|
1118
1140
|
input_cols: List[str],
|
1119
1141
|
label_cols: List[str],
|
1120
|
-
sample_weight_col: Optional[str]
|
1142
|
+
sample_weight_col: Optional[str],
|
1143
|
+
statement_params: Dict[str, str]
|
1121
1144
|
) -> float:
|
1122
1145
|
import cloudpickle as cp
|
1123
1146
|
import numpy as np
|
@@ -1167,14 +1190,14 @@ class SpectralCoclustering(BaseTransformer):
|
|
1167
1190
|
api_calls=[Session.call],
|
1168
1191
|
custom_tags=dict([("autogen", True)]),
|
1169
1192
|
)
|
1170
|
-
score =
|
1171
|
-
|
1193
|
+
score = score_wrapper_sproc(
|
1194
|
+
session,
|
1172
1195
|
query,
|
1173
1196
|
stage_score_file_name,
|
1174
1197
|
identifier.get_unescaped_names(self.input_cols),
|
1175
1198
|
identifier.get_unescaped_names(self.label_cols),
|
1176
1199
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1177
|
-
statement_params
|
1200
|
+
statement_params,
|
1178
1201
|
)
|
1179
1202
|
|
1180
1203
|
cleanup_temp_files([local_score_file_name])
|
@@ -1192,18 +1215,20 @@ class SpectralCoclustering(BaseTransformer):
|
|
1192
1215
|
if self._sklearn_object._estimator_type == 'classifier':
|
1193
1216
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1194
1217
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1195
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1218
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1219
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1196
1220
|
# For regressor, the type of predict is float64
|
1197
1221
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1198
1222
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1199
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1200
|
-
|
1223
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1224
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1201
1225
|
for prob_func in PROB_FUNCTIONS:
|
1202
1226
|
if hasattr(self, prob_func):
|
1203
1227
|
output_cols_prefix: str = f"{prob_func}_"
|
1204
1228
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1205
1229
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1206
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1230
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1231
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1207
1232
|
|
1208
1233
|
@property
|
1209
1234
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|