snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -238,7 +240,6 @@ class SparsePCA(BaseTransformer):
|
|
238
240
|
sample_weight_col: Optional[str] = None,
|
239
241
|
) -> None:
|
240
242
|
super().__init__()
|
241
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
242
243
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
243
244
|
|
244
245
|
self._deps = list(deps)
|
@@ -268,6 +269,15 @@ class SparsePCA(BaseTransformer):
|
|
268
269
|
self.set_drop_input_cols(drop_input_cols)
|
269
270
|
self.set_sample_weight_col(sample_weight_col)
|
270
271
|
|
272
|
+
def _get_rand_id(self) -> str:
|
273
|
+
"""
|
274
|
+
Generate random id to be used in sproc and stage names.
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
Random id string usable in sproc, table, and stage names.
|
278
|
+
"""
|
279
|
+
return str(uuid4()).replace("-", "_").upper()
|
280
|
+
|
271
281
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
272
282
|
"""
|
273
283
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -346,7 +356,7 @@ class SparsePCA(BaseTransformer):
|
|
346
356
|
cp.dump(self._sklearn_object, local_transform_file)
|
347
357
|
|
348
358
|
# Create temp stage to run fit.
|
349
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
359
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
350
360
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
351
361
|
SqlResultValidator(
|
352
362
|
session=session,
|
@@ -359,11 +369,12 @@ class SparsePCA(BaseTransformer):
|
|
359
369
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
360
370
|
).validate()
|
361
371
|
|
362
|
-
|
372
|
+
# Use posixpath to construct stage paths
|
373
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
374
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
363
375
|
local_result_file_name = get_temp_file_path()
|
364
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
365
376
|
|
366
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
377
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
367
378
|
statement_params = telemetry.get_function_usage_statement_params(
|
368
379
|
project=_PROJECT,
|
369
380
|
subproject=_SUBPROJECT,
|
@@ -389,6 +400,7 @@ class SparsePCA(BaseTransformer):
|
|
389
400
|
replace=True,
|
390
401
|
session=session,
|
391
402
|
statement_params=statement_params,
|
403
|
+
anonymous=True
|
392
404
|
)
|
393
405
|
def fit_wrapper_sproc(
|
394
406
|
session: Session,
|
@@ -397,7 +409,8 @@ class SparsePCA(BaseTransformer):
|
|
397
409
|
stage_result_file_name: str,
|
398
410
|
input_cols: List[str],
|
399
411
|
label_cols: List[str],
|
400
|
-
sample_weight_col: Optional[str]
|
412
|
+
sample_weight_col: Optional[str],
|
413
|
+
statement_params: Dict[str, str]
|
401
414
|
) -> str:
|
402
415
|
import cloudpickle as cp
|
403
416
|
import numpy as np
|
@@ -464,15 +477,15 @@ class SparsePCA(BaseTransformer):
|
|
464
477
|
api_calls=[Session.call],
|
465
478
|
custom_tags=dict([("autogen", True)]),
|
466
479
|
)
|
467
|
-
sproc_export_file_name =
|
468
|
-
|
480
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
481
|
+
session,
|
469
482
|
query,
|
470
483
|
stage_transform_file_name,
|
471
484
|
stage_result_file_name,
|
472
485
|
identifier.get_unescaped_names(self.input_cols),
|
473
486
|
identifier.get_unescaped_names(self.label_cols),
|
474
487
|
identifier.get_unescaped_names(self.sample_weight_col),
|
475
|
-
statement_params
|
488
|
+
statement_params,
|
476
489
|
)
|
477
490
|
|
478
491
|
if "|" in sproc_export_file_name:
|
@@ -482,7 +495,7 @@ class SparsePCA(BaseTransformer):
|
|
482
495
|
print("\n".join(fields[1:]))
|
483
496
|
|
484
497
|
session.file.get(
|
485
|
-
|
498
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
486
499
|
local_result_file_name,
|
487
500
|
statement_params=statement_params
|
488
501
|
)
|
@@ -528,7 +541,7 @@ class SparsePCA(BaseTransformer):
|
|
528
541
|
|
529
542
|
# Register vectorized UDF for batch inference
|
530
543
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
531
|
-
safe_id=self.
|
544
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
532
545
|
|
533
546
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
534
547
|
# will try to pickle all of self which fails.
|
@@ -620,7 +633,7 @@ class SparsePCA(BaseTransformer):
|
|
620
633
|
return transformed_pandas_df.to_dict("records")
|
621
634
|
|
622
635
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
623
|
-
safe_id=self.
|
636
|
+
safe_id=self._get_rand_id()
|
624
637
|
)
|
625
638
|
|
626
639
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -785,11 +798,18 @@ class SparsePCA(BaseTransformer):
|
|
785
798
|
Transformed dataset.
|
786
799
|
"""
|
787
800
|
if isinstance(dataset, DataFrame):
|
801
|
+
expected_type_inferred = ""
|
802
|
+
# when it is classifier, infer the datatype from label columns
|
803
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
804
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
805
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
806
|
+
)
|
807
|
+
|
788
808
|
output_df = self._batch_inference(
|
789
809
|
dataset=dataset,
|
790
810
|
inference_method="predict",
|
791
811
|
expected_output_cols_list=self.output_cols,
|
792
|
-
expected_output_cols_type=
|
812
|
+
expected_output_cols_type=expected_type_inferred,
|
793
813
|
)
|
794
814
|
elif isinstance(dataset, pd.DataFrame):
|
795
815
|
output_df = self._sklearn_inference(
|
@@ -862,10 +882,10 @@ class SparsePCA(BaseTransformer):
|
|
862
882
|
|
863
883
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
864
884
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
865
|
-
Returns
|
885
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
866
886
|
"""
|
867
887
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
868
|
-
return []
|
888
|
+
return [output_cols_prefix]
|
869
889
|
|
870
890
|
classes = self._sklearn_object.classes_
|
871
891
|
if isinstance(classes, numpy.ndarray):
|
@@ -1090,7 +1110,7 @@ class SparsePCA(BaseTransformer):
|
|
1090
1110
|
cp.dump(self._sklearn_object, local_score_file)
|
1091
1111
|
|
1092
1112
|
# Create temp stage to run score.
|
1093
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1113
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1094
1114
|
session = dataset._session
|
1095
1115
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1096
1116
|
SqlResultValidator(
|
@@ -1104,8 +1124,9 @@ class SparsePCA(BaseTransformer):
|
|
1104
1124
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1105
1125
|
).validate()
|
1106
1126
|
|
1107
|
-
|
1108
|
-
|
1127
|
+
# Use posixpath to construct stage paths
|
1128
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1129
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1109
1130
|
statement_params = telemetry.get_function_usage_statement_params(
|
1110
1131
|
project=_PROJECT,
|
1111
1132
|
subproject=_SUBPROJECT,
|
@@ -1131,6 +1152,7 @@ class SparsePCA(BaseTransformer):
|
|
1131
1152
|
replace=True,
|
1132
1153
|
session=session,
|
1133
1154
|
statement_params=statement_params,
|
1155
|
+
anonymous=True
|
1134
1156
|
)
|
1135
1157
|
def score_wrapper_sproc(
|
1136
1158
|
session: Session,
|
@@ -1138,7 +1160,8 @@ class SparsePCA(BaseTransformer):
|
|
1138
1160
|
stage_score_file_name: str,
|
1139
1161
|
input_cols: List[str],
|
1140
1162
|
label_cols: List[str],
|
1141
|
-
sample_weight_col: Optional[str]
|
1163
|
+
sample_weight_col: Optional[str],
|
1164
|
+
statement_params: Dict[str, str]
|
1142
1165
|
) -> float:
|
1143
1166
|
import cloudpickle as cp
|
1144
1167
|
import numpy as np
|
@@ -1188,14 +1211,14 @@ class SparsePCA(BaseTransformer):
|
|
1188
1211
|
api_calls=[Session.call],
|
1189
1212
|
custom_tags=dict([("autogen", True)]),
|
1190
1213
|
)
|
1191
|
-
score =
|
1192
|
-
|
1214
|
+
score = score_wrapper_sproc(
|
1215
|
+
session,
|
1193
1216
|
query,
|
1194
1217
|
stage_score_file_name,
|
1195
1218
|
identifier.get_unescaped_names(self.input_cols),
|
1196
1219
|
identifier.get_unescaped_names(self.label_cols),
|
1197
1220
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1198
|
-
statement_params
|
1221
|
+
statement_params,
|
1199
1222
|
)
|
1200
1223
|
|
1201
1224
|
cleanup_temp_files([local_score_file_name])
|
@@ -1213,18 +1236,20 @@ class SparsePCA(BaseTransformer):
|
|
1213
1236
|
if self._sklearn_object._estimator_type == 'classifier':
|
1214
1237
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1215
1238
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1216
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1239
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1240
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1217
1241
|
# For regressor, the type of predict is float64
|
1218
1242
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1219
1243
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1220
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
-
|
1244
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1245
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1222
1246
|
for prob_func in PROB_FUNCTIONS:
|
1223
1247
|
if hasattr(self, prob_func):
|
1224
1248
|
output_cols_prefix: str = f"{prob_func}_"
|
1225
1249
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1226
1250
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1227
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1251
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1252
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1228
1253
|
|
1229
1254
|
@property
|
1230
1255
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -223,7 +225,6 @@ class TruncatedSVD(BaseTransformer):
|
|
223
225
|
sample_weight_col: Optional[str] = None,
|
224
226
|
) -> None:
|
225
227
|
super().__init__()
|
226
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
227
228
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
228
229
|
|
229
230
|
self._deps = list(deps)
|
@@ -249,6 +250,15 @@ class TruncatedSVD(BaseTransformer):
|
|
249
250
|
self.set_drop_input_cols(drop_input_cols)
|
250
251
|
self.set_sample_weight_col(sample_weight_col)
|
251
252
|
|
253
|
+
def _get_rand_id(self) -> str:
|
254
|
+
"""
|
255
|
+
Generate random id to be used in sproc and stage names.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
Random id string usable in sproc, table, and stage names.
|
259
|
+
"""
|
260
|
+
return str(uuid4()).replace("-", "_").upper()
|
261
|
+
|
252
262
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
253
263
|
"""
|
254
264
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -327,7 +337,7 @@ class TruncatedSVD(BaseTransformer):
|
|
327
337
|
cp.dump(self._sklearn_object, local_transform_file)
|
328
338
|
|
329
339
|
# Create temp stage to run fit.
|
330
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
340
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
331
341
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
332
342
|
SqlResultValidator(
|
333
343
|
session=session,
|
@@ -340,11 +350,12 @@ class TruncatedSVD(BaseTransformer):
|
|
340
350
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
341
351
|
).validate()
|
342
352
|
|
343
|
-
|
353
|
+
# Use posixpath to construct stage paths
|
354
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
355
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
344
356
|
local_result_file_name = get_temp_file_path()
|
345
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
357
|
|
347
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
358
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
348
359
|
statement_params = telemetry.get_function_usage_statement_params(
|
349
360
|
project=_PROJECT,
|
350
361
|
subproject=_SUBPROJECT,
|
@@ -370,6 +381,7 @@ class TruncatedSVD(BaseTransformer):
|
|
370
381
|
replace=True,
|
371
382
|
session=session,
|
372
383
|
statement_params=statement_params,
|
384
|
+
anonymous=True
|
373
385
|
)
|
374
386
|
def fit_wrapper_sproc(
|
375
387
|
session: Session,
|
@@ -378,7 +390,8 @@ class TruncatedSVD(BaseTransformer):
|
|
378
390
|
stage_result_file_name: str,
|
379
391
|
input_cols: List[str],
|
380
392
|
label_cols: List[str],
|
381
|
-
sample_weight_col: Optional[str]
|
393
|
+
sample_weight_col: Optional[str],
|
394
|
+
statement_params: Dict[str, str]
|
382
395
|
) -> str:
|
383
396
|
import cloudpickle as cp
|
384
397
|
import numpy as np
|
@@ -445,15 +458,15 @@ class TruncatedSVD(BaseTransformer):
|
|
445
458
|
api_calls=[Session.call],
|
446
459
|
custom_tags=dict([("autogen", True)]),
|
447
460
|
)
|
448
|
-
sproc_export_file_name =
|
449
|
-
|
461
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
462
|
+
session,
|
450
463
|
query,
|
451
464
|
stage_transform_file_name,
|
452
465
|
stage_result_file_name,
|
453
466
|
identifier.get_unescaped_names(self.input_cols),
|
454
467
|
identifier.get_unescaped_names(self.label_cols),
|
455
468
|
identifier.get_unescaped_names(self.sample_weight_col),
|
456
|
-
statement_params
|
469
|
+
statement_params,
|
457
470
|
)
|
458
471
|
|
459
472
|
if "|" in sproc_export_file_name:
|
@@ -463,7 +476,7 @@ class TruncatedSVD(BaseTransformer):
|
|
463
476
|
print("\n".join(fields[1:]))
|
464
477
|
|
465
478
|
session.file.get(
|
466
|
-
|
479
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
467
480
|
local_result_file_name,
|
468
481
|
statement_params=statement_params
|
469
482
|
)
|
@@ -509,7 +522,7 @@ class TruncatedSVD(BaseTransformer):
|
|
509
522
|
|
510
523
|
# Register vectorized UDF for batch inference
|
511
524
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
512
|
-
safe_id=self.
|
525
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
513
526
|
|
514
527
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
515
528
|
# will try to pickle all of self which fails.
|
@@ -601,7 +614,7 @@ class TruncatedSVD(BaseTransformer):
|
|
601
614
|
return transformed_pandas_df.to_dict("records")
|
602
615
|
|
603
616
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
604
|
-
safe_id=self.
|
617
|
+
safe_id=self._get_rand_id()
|
605
618
|
)
|
606
619
|
|
607
620
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -766,11 +779,18 @@ class TruncatedSVD(BaseTransformer):
|
|
766
779
|
Transformed dataset.
|
767
780
|
"""
|
768
781
|
if isinstance(dataset, DataFrame):
|
782
|
+
expected_type_inferred = ""
|
783
|
+
# when it is classifier, infer the datatype from label columns
|
784
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
785
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
786
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
787
|
+
)
|
788
|
+
|
769
789
|
output_df = self._batch_inference(
|
770
790
|
dataset=dataset,
|
771
791
|
inference_method="predict",
|
772
792
|
expected_output_cols_list=self.output_cols,
|
773
|
-
expected_output_cols_type=
|
793
|
+
expected_output_cols_type=expected_type_inferred,
|
774
794
|
)
|
775
795
|
elif isinstance(dataset, pd.DataFrame):
|
776
796
|
output_df = self._sklearn_inference(
|
@@ -843,10 +863,10 @@ class TruncatedSVD(BaseTransformer):
|
|
843
863
|
|
844
864
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
845
865
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
846
|
-
Returns
|
866
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
847
867
|
"""
|
848
868
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
849
|
-
return []
|
869
|
+
return [output_cols_prefix]
|
850
870
|
|
851
871
|
classes = self._sklearn_object.classes_
|
852
872
|
if isinstance(classes, numpy.ndarray):
|
@@ -1071,7 +1091,7 @@ class TruncatedSVD(BaseTransformer):
|
|
1071
1091
|
cp.dump(self._sklearn_object, local_score_file)
|
1072
1092
|
|
1073
1093
|
# Create temp stage to run score.
|
1074
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1094
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1075
1095
|
session = dataset._session
|
1076
1096
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1077
1097
|
SqlResultValidator(
|
@@ -1085,8 +1105,9 @@ class TruncatedSVD(BaseTransformer):
|
|
1085
1105
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1086
1106
|
).validate()
|
1087
1107
|
|
1088
|
-
|
1089
|
-
|
1108
|
+
# Use posixpath to construct stage paths
|
1109
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1110
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1090
1111
|
statement_params = telemetry.get_function_usage_statement_params(
|
1091
1112
|
project=_PROJECT,
|
1092
1113
|
subproject=_SUBPROJECT,
|
@@ -1112,6 +1133,7 @@ class TruncatedSVD(BaseTransformer):
|
|
1112
1133
|
replace=True,
|
1113
1134
|
session=session,
|
1114
1135
|
statement_params=statement_params,
|
1136
|
+
anonymous=True
|
1115
1137
|
)
|
1116
1138
|
def score_wrapper_sproc(
|
1117
1139
|
session: Session,
|
@@ -1119,7 +1141,8 @@ class TruncatedSVD(BaseTransformer):
|
|
1119
1141
|
stage_score_file_name: str,
|
1120
1142
|
input_cols: List[str],
|
1121
1143
|
label_cols: List[str],
|
1122
|
-
sample_weight_col: Optional[str]
|
1144
|
+
sample_weight_col: Optional[str],
|
1145
|
+
statement_params: Dict[str, str]
|
1123
1146
|
) -> float:
|
1124
1147
|
import cloudpickle as cp
|
1125
1148
|
import numpy as np
|
@@ -1169,14 +1192,14 @@ class TruncatedSVD(BaseTransformer):
|
|
1169
1192
|
api_calls=[Session.call],
|
1170
1193
|
custom_tags=dict([("autogen", True)]),
|
1171
1194
|
)
|
1172
|
-
score =
|
1173
|
-
|
1195
|
+
score = score_wrapper_sproc(
|
1196
|
+
session,
|
1174
1197
|
query,
|
1175
1198
|
stage_score_file_name,
|
1176
1199
|
identifier.get_unescaped_names(self.input_cols),
|
1177
1200
|
identifier.get_unescaped_names(self.label_cols),
|
1178
1201
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1179
|
-
statement_params
|
1202
|
+
statement_params,
|
1180
1203
|
)
|
1181
1204
|
|
1182
1205
|
cleanup_temp_files([local_score_file_name])
|
@@ -1194,18 +1217,20 @@ class TruncatedSVD(BaseTransformer):
|
|
1194
1217
|
if self._sklearn_object._estimator_type == 'classifier':
|
1195
1218
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1196
1219
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1197
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1220
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1198
1222
|
# For regressor, the type of predict is float64
|
1199
1223
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1200
1224
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1201
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1202
|
-
|
1225
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1226
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1203
1227
|
for prob_func in PROB_FUNCTIONS:
|
1204
1228
|
if hasattr(self, prob_func):
|
1205
1229
|
output_cols_prefix: str = f"{prob_func}_"
|
1206
1230
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1207
1231
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1208
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1232
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1233
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1209
1234
|
|
1210
1235
|
@property
|
1211
1236
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|