snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -245,7 +247,6 @@ class SpectralEmbedding(BaseTransformer):
|
|
245
247
|
sample_weight_col: Optional[str] = None,
|
246
248
|
) -> None:
|
247
249
|
super().__init__()
|
248
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
249
250
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
250
251
|
|
251
252
|
self._deps = list(deps)
|
@@ -272,6 +273,15 @@ class SpectralEmbedding(BaseTransformer):
|
|
272
273
|
self.set_drop_input_cols(drop_input_cols)
|
273
274
|
self.set_sample_weight_col(sample_weight_col)
|
274
275
|
|
276
|
+
def _get_rand_id(self) -> str:
|
277
|
+
"""
|
278
|
+
Generate random id to be used in sproc and stage names.
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
Random id string usable in sproc, table, and stage names.
|
282
|
+
"""
|
283
|
+
return str(uuid4()).replace("-", "_").upper()
|
284
|
+
|
275
285
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
276
286
|
"""
|
277
287
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -350,7 +360,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
350
360
|
cp.dump(self._sklearn_object, local_transform_file)
|
351
361
|
|
352
362
|
# Create temp stage to run fit.
|
353
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
363
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
354
364
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
355
365
|
SqlResultValidator(
|
356
366
|
session=session,
|
@@ -363,11 +373,12 @@ class SpectralEmbedding(BaseTransformer):
|
|
363
373
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
364
374
|
).validate()
|
365
375
|
|
366
|
-
|
376
|
+
# Use posixpath to construct stage paths
|
377
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
378
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
367
379
|
local_result_file_name = get_temp_file_path()
|
368
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
369
380
|
|
370
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
381
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
371
382
|
statement_params = telemetry.get_function_usage_statement_params(
|
372
383
|
project=_PROJECT,
|
373
384
|
subproject=_SUBPROJECT,
|
@@ -393,6 +404,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
393
404
|
replace=True,
|
394
405
|
session=session,
|
395
406
|
statement_params=statement_params,
|
407
|
+
anonymous=True
|
396
408
|
)
|
397
409
|
def fit_wrapper_sproc(
|
398
410
|
session: Session,
|
@@ -401,7 +413,8 @@ class SpectralEmbedding(BaseTransformer):
|
|
401
413
|
stage_result_file_name: str,
|
402
414
|
input_cols: List[str],
|
403
415
|
label_cols: List[str],
|
404
|
-
sample_weight_col: Optional[str]
|
416
|
+
sample_weight_col: Optional[str],
|
417
|
+
statement_params: Dict[str, str]
|
405
418
|
) -> str:
|
406
419
|
import cloudpickle as cp
|
407
420
|
import numpy as np
|
@@ -468,15 +481,15 @@ class SpectralEmbedding(BaseTransformer):
|
|
468
481
|
api_calls=[Session.call],
|
469
482
|
custom_tags=dict([("autogen", True)]),
|
470
483
|
)
|
471
|
-
sproc_export_file_name =
|
472
|
-
|
484
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
485
|
+
session,
|
473
486
|
query,
|
474
487
|
stage_transform_file_name,
|
475
488
|
stage_result_file_name,
|
476
489
|
identifier.get_unescaped_names(self.input_cols),
|
477
490
|
identifier.get_unescaped_names(self.label_cols),
|
478
491
|
identifier.get_unescaped_names(self.sample_weight_col),
|
479
|
-
statement_params
|
492
|
+
statement_params,
|
480
493
|
)
|
481
494
|
|
482
495
|
if "|" in sproc_export_file_name:
|
@@ -486,7 +499,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
486
499
|
print("\n".join(fields[1:]))
|
487
500
|
|
488
501
|
session.file.get(
|
489
|
-
|
502
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
490
503
|
local_result_file_name,
|
491
504
|
statement_params=statement_params
|
492
505
|
)
|
@@ -532,7 +545,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
532
545
|
|
533
546
|
# Register vectorized UDF for batch inference
|
534
547
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
535
|
-
safe_id=self.
|
548
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
536
549
|
|
537
550
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
538
551
|
# will try to pickle all of self which fails.
|
@@ -624,7 +637,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
624
637
|
return transformed_pandas_df.to_dict("records")
|
625
638
|
|
626
639
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
627
|
-
safe_id=self.
|
640
|
+
safe_id=self._get_rand_id()
|
628
641
|
)
|
629
642
|
|
630
643
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -789,11 +802,18 @@ class SpectralEmbedding(BaseTransformer):
|
|
789
802
|
Transformed dataset.
|
790
803
|
"""
|
791
804
|
if isinstance(dataset, DataFrame):
|
805
|
+
expected_type_inferred = ""
|
806
|
+
# when it is classifier, infer the datatype from label columns
|
807
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
808
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
809
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
810
|
+
)
|
811
|
+
|
792
812
|
output_df = self._batch_inference(
|
793
813
|
dataset=dataset,
|
794
814
|
inference_method="predict",
|
795
815
|
expected_output_cols_list=self.output_cols,
|
796
|
-
expected_output_cols_type=
|
816
|
+
expected_output_cols_type=expected_type_inferred,
|
797
817
|
)
|
798
818
|
elif isinstance(dataset, pd.DataFrame):
|
799
819
|
output_df = self._sklearn_inference(
|
@@ -864,10 +884,10 @@ class SpectralEmbedding(BaseTransformer):
|
|
864
884
|
|
865
885
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
866
886
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
867
|
-
Returns
|
887
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
868
888
|
"""
|
869
889
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
870
|
-
return []
|
890
|
+
return [output_cols_prefix]
|
871
891
|
|
872
892
|
classes = self._sklearn_object.classes_
|
873
893
|
if isinstance(classes, numpy.ndarray):
|
@@ -1092,7 +1112,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
1092
1112
|
cp.dump(self._sklearn_object, local_score_file)
|
1093
1113
|
|
1094
1114
|
# Create temp stage to run score.
|
1095
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1115
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1096
1116
|
session = dataset._session
|
1097
1117
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1098
1118
|
SqlResultValidator(
|
@@ -1106,8 +1126,9 @@ class SpectralEmbedding(BaseTransformer):
|
|
1106
1126
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1107
1127
|
).validate()
|
1108
1128
|
|
1109
|
-
|
1110
|
-
|
1129
|
+
# Use posixpath to construct stage paths
|
1130
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1131
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1111
1132
|
statement_params = telemetry.get_function_usage_statement_params(
|
1112
1133
|
project=_PROJECT,
|
1113
1134
|
subproject=_SUBPROJECT,
|
@@ -1133,6 +1154,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
1133
1154
|
replace=True,
|
1134
1155
|
session=session,
|
1135
1156
|
statement_params=statement_params,
|
1157
|
+
anonymous=True
|
1136
1158
|
)
|
1137
1159
|
def score_wrapper_sproc(
|
1138
1160
|
session: Session,
|
@@ -1140,7 +1162,8 @@ class SpectralEmbedding(BaseTransformer):
|
|
1140
1162
|
stage_score_file_name: str,
|
1141
1163
|
input_cols: List[str],
|
1142
1164
|
label_cols: List[str],
|
1143
|
-
sample_weight_col: Optional[str]
|
1165
|
+
sample_weight_col: Optional[str],
|
1166
|
+
statement_params: Dict[str, str]
|
1144
1167
|
) -> float:
|
1145
1168
|
import cloudpickle as cp
|
1146
1169
|
import numpy as np
|
@@ -1190,14 +1213,14 @@ class SpectralEmbedding(BaseTransformer):
|
|
1190
1213
|
api_calls=[Session.call],
|
1191
1214
|
custom_tags=dict([("autogen", True)]),
|
1192
1215
|
)
|
1193
|
-
score =
|
1194
|
-
|
1216
|
+
score = score_wrapper_sproc(
|
1217
|
+
session,
|
1195
1218
|
query,
|
1196
1219
|
stage_score_file_name,
|
1197
1220
|
identifier.get_unescaped_names(self.input_cols),
|
1198
1221
|
identifier.get_unescaped_names(self.label_cols),
|
1199
1222
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1200
|
-
statement_params
|
1223
|
+
statement_params,
|
1201
1224
|
)
|
1202
1225
|
|
1203
1226
|
cleanup_temp_files([local_score_file_name])
|
@@ -1215,18 +1238,20 @@ class SpectralEmbedding(BaseTransformer):
|
|
1215
1238
|
if self._sklearn_object._estimator_type == 'classifier':
|
1216
1239
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1217
1240
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1218
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1241
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1242
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1219
1243
|
# For regressor, the type of predict is float64
|
1220
1244
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1221
1245
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1222
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1223
|
-
|
1246
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1247
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1224
1248
|
for prob_func in PROB_FUNCTIONS:
|
1225
1249
|
if hasattr(self, prob_func):
|
1226
1250
|
output_cols_prefix: str = f"{prob_func}_"
|
1227
1251
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1228
1252
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1229
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1253
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1254
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1230
1255
|
|
1231
1256
|
@property
|
1232
1257
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -302,7 +304,6 @@ class TSNE(BaseTransformer):
|
|
302
304
|
sample_weight_col: Optional[str] = None,
|
303
305
|
) -> None:
|
304
306
|
super().__init__()
|
305
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
306
307
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
307
308
|
|
308
309
|
self._deps = list(deps)
|
@@ -337,6 +338,15 @@ class TSNE(BaseTransformer):
|
|
337
338
|
self.set_drop_input_cols(drop_input_cols)
|
338
339
|
self.set_sample_weight_col(sample_weight_col)
|
339
340
|
|
341
|
+
def _get_rand_id(self) -> str:
|
342
|
+
"""
|
343
|
+
Generate random id to be used in sproc and stage names.
|
344
|
+
|
345
|
+
Returns:
|
346
|
+
Random id string usable in sproc, table, and stage names.
|
347
|
+
"""
|
348
|
+
return str(uuid4()).replace("-", "_").upper()
|
349
|
+
|
340
350
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
341
351
|
"""
|
342
352
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -415,7 +425,7 @@ class TSNE(BaseTransformer):
|
|
415
425
|
cp.dump(self._sklearn_object, local_transform_file)
|
416
426
|
|
417
427
|
# Create temp stage to run fit.
|
418
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
428
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
419
429
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
420
430
|
SqlResultValidator(
|
421
431
|
session=session,
|
@@ -428,11 +438,12 @@ class TSNE(BaseTransformer):
|
|
428
438
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
429
439
|
).validate()
|
430
440
|
|
431
|
-
|
441
|
+
# Use posixpath to construct stage paths
|
442
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
443
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
432
444
|
local_result_file_name = get_temp_file_path()
|
433
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
434
445
|
|
435
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
446
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
436
447
|
statement_params = telemetry.get_function_usage_statement_params(
|
437
448
|
project=_PROJECT,
|
438
449
|
subproject=_SUBPROJECT,
|
@@ -458,6 +469,7 @@ class TSNE(BaseTransformer):
|
|
458
469
|
replace=True,
|
459
470
|
session=session,
|
460
471
|
statement_params=statement_params,
|
472
|
+
anonymous=True
|
461
473
|
)
|
462
474
|
def fit_wrapper_sproc(
|
463
475
|
session: Session,
|
@@ -466,7 +478,8 @@ class TSNE(BaseTransformer):
|
|
466
478
|
stage_result_file_name: str,
|
467
479
|
input_cols: List[str],
|
468
480
|
label_cols: List[str],
|
469
|
-
sample_weight_col: Optional[str]
|
481
|
+
sample_weight_col: Optional[str],
|
482
|
+
statement_params: Dict[str, str]
|
470
483
|
) -> str:
|
471
484
|
import cloudpickle as cp
|
472
485
|
import numpy as np
|
@@ -533,15 +546,15 @@ class TSNE(BaseTransformer):
|
|
533
546
|
api_calls=[Session.call],
|
534
547
|
custom_tags=dict([("autogen", True)]),
|
535
548
|
)
|
536
|
-
sproc_export_file_name =
|
537
|
-
|
549
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
550
|
+
session,
|
538
551
|
query,
|
539
552
|
stage_transform_file_name,
|
540
553
|
stage_result_file_name,
|
541
554
|
identifier.get_unescaped_names(self.input_cols),
|
542
555
|
identifier.get_unescaped_names(self.label_cols),
|
543
556
|
identifier.get_unescaped_names(self.sample_weight_col),
|
544
|
-
statement_params
|
557
|
+
statement_params,
|
545
558
|
)
|
546
559
|
|
547
560
|
if "|" in sproc_export_file_name:
|
@@ -551,7 +564,7 @@ class TSNE(BaseTransformer):
|
|
551
564
|
print("\n".join(fields[1:]))
|
552
565
|
|
553
566
|
session.file.get(
|
554
|
-
|
567
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
555
568
|
local_result_file_name,
|
556
569
|
statement_params=statement_params
|
557
570
|
)
|
@@ -597,7 +610,7 @@ class TSNE(BaseTransformer):
|
|
597
610
|
|
598
611
|
# Register vectorized UDF for batch inference
|
599
612
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
600
|
-
safe_id=self.
|
613
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
601
614
|
|
602
615
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
603
616
|
# will try to pickle all of self which fails.
|
@@ -689,7 +702,7 @@ class TSNE(BaseTransformer):
|
|
689
702
|
return transformed_pandas_df.to_dict("records")
|
690
703
|
|
691
704
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
692
|
-
safe_id=self.
|
705
|
+
safe_id=self._get_rand_id()
|
693
706
|
)
|
694
707
|
|
695
708
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -854,11 +867,18 @@ class TSNE(BaseTransformer):
|
|
854
867
|
Transformed dataset.
|
855
868
|
"""
|
856
869
|
if isinstance(dataset, DataFrame):
|
870
|
+
expected_type_inferred = ""
|
871
|
+
# when it is classifier, infer the datatype from label columns
|
872
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
873
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
874
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
875
|
+
)
|
876
|
+
|
857
877
|
output_df = self._batch_inference(
|
858
878
|
dataset=dataset,
|
859
879
|
inference_method="predict",
|
860
880
|
expected_output_cols_list=self.output_cols,
|
861
|
-
expected_output_cols_type=
|
881
|
+
expected_output_cols_type=expected_type_inferred,
|
862
882
|
)
|
863
883
|
elif isinstance(dataset, pd.DataFrame):
|
864
884
|
output_df = self._sklearn_inference(
|
@@ -929,10 +949,10 @@ class TSNE(BaseTransformer):
|
|
929
949
|
|
930
950
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
931
951
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
932
|
-
Returns
|
952
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
933
953
|
"""
|
934
954
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
935
|
-
return []
|
955
|
+
return [output_cols_prefix]
|
936
956
|
|
937
957
|
classes = self._sklearn_object.classes_
|
938
958
|
if isinstance(classes, numpy.ndarray):
|
@@ -1157,7 +1177,7 @@ class TSNE(BaseTransformer):
|
|
1157
1177
|
cp.dump(self._sklearn_object, local_score_file)
|
1158
1178
|
|
1159
1179
|
# Create temp stage to run score.
|
1160
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1180
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1161
1181
|
session = dataset._session
|
1162
1182
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1163
1183
|
SqlResultValidator(
|
@@ -1171,8 +1191,9 @@ class TSNE(BaseTransformer):
|
|
1171
1191
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1172
1192
|
).validate()
|
1173
1193
|
|
1174
|
-
|
1175
|
-
|
1194
|
+
# Use posixpath to construct stage paths
|
1195
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1196
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1176
1197
|
statement_params = telemetry.get_function_usage_statement_params(
|
1177
1198
|
project=_PROJECT,
|
1178
1199
|
subproject=_SUBPROJECT,
|
@@ -1198,6 +1219,7 @@ class TSNE(BaseTransformer):
|
|
1198
1219
|
replace=True,
|
1199
1220
|
session=session,
|
1200
1221
|
statement_params=statement_params,
|
1222
|
+
anonymous=True
|
1201
1223
|
)
|
1202
1224
|
def score_wrapper_sproc(
|
1203
1225
|
session: Session,
|
@@ -1205,7 +1227,8 @@ class TSNE(BaseTransformer):
|
|
1205
1227
|
stage_score_file_name: str,
|
1206
1228
|
input_cols: List[str],
|
1207
1229
|
label_cols: List[str],
|
1208
|
-
sample_weight_col: Optional[str]
|
1230
|
+
sample_weight_col: Optional[str],
|
1231
|
+
statement_params: Dict[str, str]
|
1209
1232
|
) -> float:
|
1210
1233
|
import cloudpickle as cp
|
1211
1234
|
import numpy as np
|
@@ -1255,14 +1278,14 @@ class TSNE(BaseTransformer):
|
|
1255
1278
|
api_calls=[Session.call],
|
1256
1279
|
custom_tags=dict([("autogen", True)]),
|
1257
1280
|
)
|
1258
|
-
score =
|
1259
|
-
|
1281
|
+
score = score_wrapper_sproc(
|
1282
|
+
session,
|
1260
1283
|
query,
|
1261
1284
|
stage_score_file_name,
|
1262
1285
|
identifier.get_unescaped_names(self.input_cols),
|
1263
1286
|
identifier.get_unescaped_names(self.label_cols),
|
1264
1287
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1265
|
-
statement_params
|
1288
|
+
statement_params,
|
1266
1289
|
)
|
1267
1290
|
|
1268
1291
|
cleanup_temp_files([local_score_file_name])
|
@@ -1280,18 +1303,20 @@ class TSNE(BaseTransformer):
|
|
1280
1303
|
if self._sklearn_object._estimator_type == 'classifier':
|
1281
1304
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1282
1305
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1283
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1306
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1307
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1284
1308
|
# For regressor, the type of predict is float64
|
1285
1309
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1286
1310
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1287
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1288
|
-
|
1311
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1312
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1289
1313
|
for prob_func in PROB_FUNCTIONS:
|
1290
1314
|
if hasattr(self, prob_func):
|
1291
1315
|
output_cols_prefix: str = f"{prob_func}_"
|
1292
1316
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1293
1317
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1294
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1318
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1319
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1295
1320
|
|
1296
1321
|
@property
|
1297
1322
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|