snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -291,7 +293,6 @@ class KernelPCA(BaseTransformer):
|
|
291
293
|
sample_weight_col: Optional[str] = None,
|
292
294
|
) -> None:
|
293
295
|
super().__init__()
|
294
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
295
296
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
296
297
|
|
297
298
|
self._deps = list(deps)
|
@@ -326,6 +327,15 @@ class KernelPCA(BaseTransformer):
|
|
326
327
|
self.set_drop_input_cols(drop_input_cols)
|
327
328
|
self.set_sample_weight_col(sample_weight_col)
|
328
329
|
|
330
|
+
def _get_rand_id(self) -> str:
|
331
|
+
"""
|
332
|
+
Generate random id to be used in sproc and stage names.
|
333
|
+
|
334
|
+
Returns:
|
335
|
+
Random id string usable in sproc, table, and stage names.
|
336
|
+
"""
|
337
|
+
return str(uuid4()).replace("-", "_").upper()
|
338
|
+
|
329
339
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
330
340
|
"""
|
331
341
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -404,7 +414,7 @@ class KernelPCA(BaseTransformer):
|
|
404
414
|
cp.dump(self._sklearn_object, local_transform_file)
|
405
415
|
|
406
416
|
# Create temp stage to run fit.
|
407
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
417
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
408
418
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
409
419
|
SqlResultValidator(
|
410
420
|
session=session,
|
@@ -417,11 +427,12 @@ class KernelPCA(BaseTransformer):
|
|
417
427
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
418
428
|
).validate()
|
419
429
|
|
420
|
-
|
430
|
+
# Use posixpath to construct stage paths
|
431
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
432
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
421
433
|
local_result_file_name = get_temp_file_path()
|
422
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
423
434
|
|
424
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
435
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
425
436
|
statement_params = telemetry.get_function_usage_statement_params(
|
426
437
|
project=_PROJECT,
|
427
438
|
subproject=_SUBPROJECT,
|
@@ -447,6 +458,7 @@ class KernelPCA(BaseTransformer):
|
|
447
458
|
replace=True,
|
448
459
|
session=session,
|
449
460
|
statement_params=statement_params,
|
461
|
+
anonymous=True
|
450
462
|
)
|
451
463
|
def fit_wrapper_sproc(
|
452
464
|
session: Session,
|
@@ -455,7 +467,8 @@ class KernelPCA(BaseTransformer):
|
|
455
467
|
stage_result_file_name: str,
|
456
468
|
input_cols: List[str],
|
457
469
|
label_cols: List[str],
|
458
|
-
sample_weight_col: Optional[str]
|
470
|
+
sample_weight_col: Optional[str],
|
471
|
+
statement_params: Dict[str, str]
|
459
472
|
) -> str:
|
460
473
|
import cloudpickle as cp
|
461
474
|
import numpy as np
|
@@ -522,15 +535,15 @@ class KernelPCA(BaseTransformer):
|
|
522
535
|
api_calls=[Session.call],
|
523
536
|
custom_tags=dict([("autogen", True)]),
|
524
537
|
)
|
525
|
-
sproc_export_file_name =
|
526
|
-
|
538
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
539
|
+
session,
|
527
540
|
query,
|
528
541
|
stage_transform_file_name,
|
529
542
|
stage_result_file_name,
|
530
543
|
identifier.get_unescaped_names(self.input_cols),
|
531
544
|
identifier.get_unescaped_names(self.label_cols),
|
532
545
|
identifier.get_unescaped_names(self.sample_weight_col),
|
533
|
-
statement_params
|
546
|
+
statement_params,
|
534
547
|
)
|
535
548
|
|
536
549
|
if "|" in sproc_export_file_name:
|
@@ -540,7 +553,7 @@ class KernelPCA(BaseTransformer):
|
|
540
553
|
print("\n".join(fields[1:]))
|
541
554
|
|
542
555
|
session.file.get(
|
543
|
-
|
556
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
544
557
|
local_result_file_name,
|
545
558
|
statement_params=statement_params
|
546
559
|
)
|
@@ -586,7 +599,7 @@ class KernelPCA(BaseTransformer):
|
|
586
599
|
|
587
600
|
# Register vectorized UDF for batch inference
|
588
601
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
589
|
-
safe_id=self.
|
602
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
590
603
|
|
591
604
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
592
605
|
# will try to pickle all of self which fails.
|
@@ -678,7 +691,7 @@ class KernelPCA(BaseTransformer):
|
|
678
691
|
return transformed_pandas_df.to_dict("records")
|
679
692
|
|
680
693
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
681
|
-
safe_id=self.
|
694
|
+
safe_id=self._get_rand_id()
|
682
695
|
)
|
683
696
|
|
684
697
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -843,11 +856,18 @@ class KernelPCA(BaseTransformer):
|
|
843
856
|
Transformed dataset.
|
844
857
|
"""
|
845
858
|
if isinstance(dataset, DataFrame):
|
859
|
+
expected_type_inferred = ""
|
860
|
+
# when it is classifier, infer the datatype from label columns
|
861
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
862
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
863
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
864
|
+
)
|
865
|
+
|
846
866
|
output_df = self._batch_inference(
|
847
867
|
dataset=dataset,
|
848
868
|
inference_method="predict",
|
849
869
|
expected_output_cols_list=self.output_cols,
|
850
|
-
expected_output_cols_type=
|
870
|
+
expected_output_cols_type=expected_type_inferred,
|
851
871
|
)
|
852
872
|
elif isinstance(dataset, pd.DataFrame):
|
853
873
|
output_df = self._sklearn_inference(
|
@@ -920,10 +940,10 @@ class KernelPCA(BaseTransformer):
|
|
920
940
|
|
921
941
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
922
942
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
923
|
-
Returns
|
943
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
924
944
|
"""
|
925
945
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
926
|
-
return []
|
946
|
+
return [output_cols_prefix]
|
927
947
|
|
928
948
|
classes = self._sklearn_object.classes_
|
929
949
|
if isinstance(classes, numpy.ndarray):
|
@@ -1148,7 +1168,7 @@ class KernelPCA(BaseTransformer):
|
|
1148
1168
|
cp.dump(self._sklearn_object, local_score_file)
|
1149
1169
|
|
1150
1170
|
# Create temp stage to run score.
|
1151
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1171
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1152
1172
|
session = dataset._session
|
1153
1173
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1154
1174
|
SqlResultValidator(
|
@@ -1162,8 +1182,9 @@ class KernelPCA(BaseTransformer):
|
|
1162
1182
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1163
1183
|
).validate()
|
1164
1184
|
|
1165
|
-
|
1166
|
-
|
1185
|
+
# Use posixpath to construct stage paths
|
1186
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1187
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1167
1188
|
statement_params = telemetry.get_function_usage_statement_params(
|
1168
1189
|
project=_PROJECT,
|
1169
1190
|
subproject=_SUBPROJECT,
|
@@ -1189,6 +1210,7 @@ class KernelPCA(BaseTransformer):
|
|
1189
1210
|
replace=True,
|
1190
1211
|
session=session,
|
1191
1212
|
statement_params=statement_params,
|
1213
|
+
anonymous=True
|
1192
1214
|
)
|
1193
1215
|
def score_wrapper_sproc(
|
1194
1216
|
session: Session,
|
@@ -1196,7 +1218,8 @@ class KernelPCA(BaseTransformer):
|
|
1196
1218
|
stage_score_file_name: str,
|
1197
1219
|
input_cols: List[str],
|
1198
1220
|
label_cols: List[str],
|
1199
|
-
sample_weight_col: Optional[str]
|
1221
|
+
sample_weight_col: Optional[str],
|
1222
|
+
statement_params: Dict[str, str]
|
1200
1223
|
) -> float:
|
1201
1224
|
import cloudpickle as cp
|
1202
1225
|
import numpy as np
|
@@ -1246,14 +1269,14 @@ class KernelPCA(BaseTransformer):
|
|
1246
1269
|
api_calls=[Session.call],
|
1247
1270
|
custom_tags=dict([("autogen", True)]),
|
1248
1271
|
)
|
1249
|
-
score =
|
1250
|
-
|
1272
|
+
score = score_wrapper_sproc(
|
1273
|
+
session,
|
1251
1274
|
query,
|
1252
1275
|
stage_score_file_name,
|
1253
1276
|
identifier.get_unescaped_names(self.input_cols),
|
1254
1277
|
identifier.get_unescaped_names(self.label_cols),
|
1255
1278
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1256
|
-
statement_params
|
1279
|
+
statement_params,
|
1257
1280
|
)
|
1258
1281
|
|
1259
1282
|
cleanup_temp_files([local_score_file_name])
|
@@ -1271,18 +1294,20 @@ class KernelPCA(BaseTransformer):
|
|
1271
1294
|
if self._sklearn_object._estimator_type == 'classifier':
|
1272
1295
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1273
1296
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1274
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1297
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1298
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1275
1299
|
# For regressor, the type of predict is float64
|
1276
1300
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1277
1301
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1278
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1279
|
-
|
1302
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1303
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1280
1304
|
for prob_func in PROB_FUNCTIONS:
|
1281
1305
|
if hasattr(self, prob_func):
|
1282
1306
|
output_cols_prefix: str = f"{prob_func}_"
|
1283
1307
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1284
1308
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1285
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1309
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1310
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1286
1311
|
|
1287
1312
|
@property
|
1288
1313
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -308,7 +310,6 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
308
310
|
sample_weight_col: Optional[str] = None,
|
309
311
|
) -> None:
|
310
312
|
super().__init__()
|
311
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
312
313
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
313
314
|
|
314
315
|
self._deps = list(deps)
|
@@ -348,6 +349,15 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
348
349
|
self.set_drop_input_cols(drop_input_cols)
|
349
350
|
self.set_sample_weight_col(sample_weight_col)
|
350
351
|
|
352
|
+
def _get_rand_id(self) -> str:
|
353
|
+
"""
|
354
|
+
Generate random id to be used in sproc and stage names.
|
355
|
+
|
356
|
+
Returns:
|
357
|
+
Random id string usable in sproc, table, and stage names.
|
358
|
+
"""
|
359
|
+
return str(uuid4()).replace("-", "_").upper()
|
360
|
+
|
351
361
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
352
362
|
"""
|
353
363
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -426,7 +436,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
426
436
|
cp.dump(self._sklearn_object, local_transform_file)
|
427
437
|
|
428
438
|
# Create temp stage to run fit.
|
429
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
439
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
430
440
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
431
441
|
SqlResultValidator(
|
432
442
|
session=session,
|
@@ -439,11 +449,12 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
439
449
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
440
450
|
).validate()
|
441
451
|
|
442
|
-
|
452
|
+
# Use posixpath to construct stage paths
|
453
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
454
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
443
455
|
local_result_file_name = get_temp_file_path()
|
444
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
445
456
|
|
446
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
457
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
447
458
|
statement_params = telemetry.get_function_usage_statement_params(
|
448
459
|
project=_PROJECT,
|
449
460
|
subproject=_SUBPROJECT,
|
@@ -469,6 +480,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
469
480
|
replace=True,
|
470
481
|
session=session,
|
471
482
|
statement_params=statement_params,
|
483
|
+
anonymous=True
|
472
484
|
)
|
473
485
|
def fit_wrapper_sproc(
|
474
486
|
session: Session,
|
@@ -477,7 +489,8 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
477
489
|
stage_result_file_name: str,
|
478
490
|
input_cols: List[str],
|
479
491
|
label_cols: List[str],
|
480
|
-
sample_weight_col: Optional[str]
|
492
|
+
sample_weight_col: Optional[str],
|
493
|
+
statement_params: Dict[str, str]
|
481
494
|
) -> str:
|
482
495
|
import cloudpickle as cp
|
483
496
|
import numpy as np
|
@@ -544,15 +557,15 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
544
557
|
api_calls=[Session.call],
|
545
558
|
custom_tags=dict([("autogen", True)]),
|
546
559
|
)
|
547
|
-
sproc_export_file_name =
|
548
|
-
|
560
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
561
|
+
session,
|
549
562
|
query,
|
550
563
|
stage_transform_file_name,
|
551
564
|
stage_result_file_name,
|
552
565
|
identifier.get_unescaped_names(self.input_cols),
|
553
566
|
identifier.get_unescaped_names(self.label_cols),
|
554
567
|
identifier.get_unescaped_names(self.sample_weight_col),
|
555
|
-
statement_params
|
568
|
+
statement_params,
|
556
569
|
)
|
557
570
|
|
558
571
|
if "|" in sproc_export_file_name:
|
@@ -562,7 +575,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
562
575
|
print("\n".join(fields[1:]))
|
563
576
|
|
564
577
|
session.file.get(
|
565
|
-
|
578
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
566
579
|
local_result_file_name,
|
567
580
|
statement_params=statement_params
|
568
581
|
)
|
@@ -608,7 +621,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
608
621
|
|
609
622
|
# Register vectorized UDF for batch inference
|
610
623
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
611
|
-
safe_id=self.
|
624
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
612
625
|
|
613
626
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
614
627
|
# will try to pickle all of self which fails.
|
@@ -700,7 +713,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
700
713
|
return transformed_pandas_df.to_dict("records")
|
701
714
|
|
702
715
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
703
|
-
safe_id=self.
|
716
|
+
safe_id=self._get_rand_id()
|
704
717
|
)
|
705
718
|
|
706
719
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -865,11 +878,18 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
865
878
|
Transformed dataset.
|
866
879
|
"""
|
867
880
|
if isinstance(dataset, DataFrame):
|
881
|
+
expected_type_inferred = ""
|
882
|
+
# when it is classifier, infer the datatype from label columns
|
883
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
884
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
885
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
886
|
+
)
|
887
|
+
|
868
888
|
output_df = self._batch_inference(
|
869
889
|
dataset=dataset,
|
870
890
|
inference_method="predict",
|
871
891
|
expected_output_cols_list=self.output_cols,
|
872
|
-
expected_output_cols_type=
|
892
|
+
expected_output_cols_type=expected_type_inferred,
|
873
893
|
)
|
874
894
|
elif isinstance(dataset, pd.DataFrame):
|
875
895
|
output_df = self._sklearn_inference(
|
@@ -942,10 +962,10 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
942
962
|
|
943
963
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
944
964
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
945
|
-
Returns
|
965
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
946
966
|
"""
|
947
967
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
948
|
-
return []
|
968
|
+
return [output_cols_prefix]
|
949
969
|
|
950
970
|
classes = self._sklearn_object.classes_
|
951
971
|
if isinstance(classes, numpy.ndarray):
|
@@ -1170,7 +1190,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
1170
1190
|
cp.dump(self._sklearn_object, local_score_file)
|
1171
1191
|
|
1172
1192
|
# Create temp stage to run score.
|
1173
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1193
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1174
1194
|
session = dataset._session
|
1175
1195
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1176
1196
|
SqlResultValidator(
|
@@ -1184,8 +1204,9 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
1184
1204
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1185
1205
|
).validate()
|
1186
1206
|
|
1187
|
-
|
1188
|
-
|
1207
|
+
# Use posixpath to construct stage paths
|
1208
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1209
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1189
1210
|
statement_params = telemetry.get_function_usage_statement_params(
|
1190
1211
|
project=_PROJECT,
|
1191
1212
|
subproject=_SUBPROJECT,
|
@@ -1211,6 +1232,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
1211
1232
|
replace=True,
|
1212
1233
|
session=session,
|
1213
1234
|
statement_params=statement_params,
|
1235
|
+
anonymous=True
|
1214
1236
|
)
|
1215
1237
|
def score_wrapper_sproc(
|
1216
1238
|
session: Session,
|
@@ -1218,7 +1240,8 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
1218
1240
|
stage_score_file_name: str,
|
1219
1241
|
input_cols: List[str],
|
1220
1242
|
label_cols: List[str],
|
1221
|
-
sample_weight_col: Optional[str]
|
1243
|
+
sample_weight_col: Optional[str],
|
1244
|
+
statement_params: Dict[str, str]
|
1222
1245
|
) -> float:
|
1223
1246
|
import cloudpickle as cp
|
1224
1247
|
import numpy as np
|
@@ -1268,14 +1291,14 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
1268
1291
|
api_calls=[Session.call],
|
1269
1292
|
custom_tags=dict([("autogen", True)]),
|
1270
1293
|
)
|
1271
|
-
score =
|
1272
|
-
|
1294
|
+
score = score_wrapper_sproc(
|
1295
|
+
session,
|
1273
1296
|
query,
|
1274
1297
|
stage_score_file_name,
|
1275
1298
|
identifier.get_unescaped_names(self.input_cols),
|
1276
1299
|
identifier.get_unescaped_names(self.label_cols),
|
1277
1300
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1278
|
-
statement_params
|
1301
|
+
statement_params,
|
1279
1302
|
)
|
1280
1303
|
|
1281
1304
|
cleanup_temp_files([local_score_file_name])
|
@@ -1293,18 +1316,20 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
1293
1316
|
if self._sklearn_object._estimator_type == 'classifier':
|
1294
1317
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1295
1318
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1296
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1319
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1320
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1297
1321
|
# For regressor, the type of predict is float64
|
1298
1322
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1299
1323
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1300
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1301
|
-
|
1324
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1325
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1302
1326
|
for prob_func in PROB_FUNCTIONS:
|
1303
1327
|
if hasattr(self, prob_func):
|
1304
1328
|
output_cols_prefix: str = f"{prob_func}_"
|
1305
1329
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1306
1330
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1307
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1331
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1332
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1308
1333
|
|
1309
1334
|
@property
|
1310
1335
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|