snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -256,7 +258,6 @@ class Isomap(BaseTransformer):
|
|
256
258
|
sample_weight_col: Optional[str] = None,
|
257
259
|
) -> None:
|
258
260
|
super().__init__()
|
259
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
260
261
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
261
262
|
|
262
263
|
self._deps = list(deps)
|
@@ -287,6 +288,15 @@ class Isomap(BaseTransformer):
|
|
287
288
|
self.set_drop_input_cols(drop_input_cols)
|
288
289
|
self.set_sample_weight_col(sample_weight_col)
|
289
290
|
|
291
|
+
def _get_rand_id(self) -> str:
|
292
|
+
"""
|
293
|
+
Generate random id to be used in sproc and stage names.
|
294
|
+
|
295
|
+
Returns:
|
296
|
+
Random id string usable in sproc, table, and stage names.
|
297
|
+
"""
|
298
|
+
return str(uuid4()).replace("-", "_").upper()
|
299
|
+
|
290
300
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
291
301
|
"""
|
292
302
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -365,7 +375,7 @@ class Isomap(BaseTransformer):
|
|
365
375
|
cp.dump(self._sklearn_object, local_transform_file)
|
366
376
|
|
367
377
|
# Create temp stage to run fit.
|
368
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
378
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
369
379
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
370
380
|
SqlResultValidator(
|
371
381
|
session=session,
|
@@ -378,11 +388,12 @@ class Isomap(BaseTransformer):
|
|
378
388
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
379
389
|
).validate()
|
380
390
|
|
381
|
-
|
391
|
+
# Use posixpath to construct stage paths
|
392
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
393
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
382
394
|
local_result_file_name = get_temp_file_path()
|
383
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
384
395
|
|
385
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
396
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
386
397
|
statement_params = telemetry.get_function_usage_statement_params(
|
387
398
|
project=_PROJECT,
|
388
399
|
subproject=_SUBPROJECT,
|
@@ -408,6 +419,7 @@ class Isomap(BaseTransformer):
|
|
408
419
|
replace=True,
|
409
420
|
session=session,
|
410
421
|
statement_params=statement_params,
|
422
|
+
anonymous=True
|
411
423
|
)
|
412
424
|
def fit_wrapper_sproc(
|
413
425
|
session: Session,
|
@@ -416,7 +428,8 @@ class Isomap(BaseTransformer):
|
|
416
428
|
stage_result_file_name: str,
|
417
429
|
input_cols: List[str],
|
418
430
|
label_cols: List[str],
|
419
|
-
sample_weight_col: Optional[str]
|
431
|
+
sample_weight_col: Optional[str],
|
432
|
+
statement_params: Dict[str, str]
|
420
433
|
) -> str:
|
421
434
|
import cloudpickle as cp
|
422
435
|
import numpy as np
|
@@ -483,15 +496,15 @@ class Isomap(BaseTransformer):
|
|
483
496
|
api_calls=[Session.call],
|
484
497
|
custom_tags=dict([("autogen", True)]),
|
485
498
|
)
|
486
|
-
sproc_export_file_name =
|
487
|
-
|
499
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
500
|
+
session,
|
488
501
|
query,
|
489
502
|
stage_transform_file_name,
|
490
503
|
stage_result_file_name,
|
491
504
|
identifier.get_unescaped_names(self.input_cols),
|
492
505
|
identifier.get_unescaped_names(self.label_cols),
|
493
506
|
identifier.get_unescaped_names(self.sample_weight_col),
|
494
|
-
statement_params
|
507
|
+
statement_params,
|
495
508
|
)
|
496
509
|
|
497
510
|
if "|" in sproc_export_file_name:
|
@@ -501,7 +514,7 @@ class Isomap(BaseTransformer):
|
|
501
514
|
print("\n".join(fields[1:]))
|
502
515
|
|
503
516
|
session.file.get(
|
504
|
-
|
517
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
505
518
|
local_result_file_name,
|
506
519
|
statement_params=statement_params
|
507
520
|
)
|
@@ -547,7 +560,7 @@ class Isomap(BaseTransformer):
|
|
547
560
|
|
548
561
|
# Register vectorized UDF for batch inference
|
549
562
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
550
|
-
safe_id=self.
|
563
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
551
564
|
|
552
565
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
553
566
|
# will try to pickle all of self which fails.
|
@@ -639,7 +652,7 @@ class Isomap(BaseTransformer):
|
|
639
652
|
return transformed_pandas_df.to_dict("records")
|
640
653
|
|
641
654
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
642
|
-
safe_id=self.
|
655
|
+
safe_id=self._get_rand_id()
|
643
656
|
)
|
644
657
|
|
645
658
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -804,11 +817,18 @@ class Isomap(BaseTransformer):
|
|
804
817
|
Transformed dataset.
|
805
818
|
"""
|
806
819
|
if isinstance(dataset, DataFrame):
|
820
|
+
expected_type_inferred = ""
|
821
|
+
# when it is classifier, infer the datatype from label columns
|
822
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
823
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
824
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
825
|
+
)
|
826
|
+
|
807
827
|
output_df = self._batch_inference(
|
808
828
|
dataset=dataset,
|
809
829
|
inference_method="predict",
|
810
830
|
expected_output_cols_list=self.output_cols,
|
811
|
-
expected_output_cols_type=
|
831
|
+
expected_output_cols_type=expected_type_inferred,
|
812
832
|
)
|
813
833
|
elif isinstance(dataset, pd.DataFrame):
|
814
834
|
output_df = self._sklearn_inference(
|
@@ -881,10 +901,10 @@ class Isomap(BaseTransformer):
|
|
881
901
|
|
882
902
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
883
903
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
884
|
-
Returns
|
904
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
885
905
|
"""
|
886
906
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
887
|
-
return []
|
907
|
+
return [output_cols_prefix]
|
888
908
|
|
889
909
|
classes = self._sklearn_object.classes_
|
890
910
|
if isinstance(classes, numpy.ndarray):
|
@@ -1109,7 +1129,7 @@ class Isomap(BaseTransformer):
|
|
1109
1129
|
cp.dump(self._sklearn_object, local_score_file)
|
1110
1130
|
|
1111
1131
|
# Create temp stage to run score.
|
1112
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1132
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1113
1133
|
session = dataset._session
|
1114
1134
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1115
1135
|
SqlResultValidator(
|
@@ -1123,8 +1143,9 @@ class Isomap(BaseTransformer):
|
|
1123
1143
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1124
1144
|
).validate()
|
1125
1145
|
|
1126
|
-
|
1127
|
-
|
1146
|
+
# Use posixpath to construct stage paths
|
1147
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1148
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1128
1149
|
statement_params = telemetry.get_function_usage_statement_params(
|
1129
1150
|
project=_PROJECT,
|
1130
1151
|
subproject=_SUBPROJECT,
|
@@ -1150,6 +1171,7 @@ class Isomap(BaseTransformer):
|
|
1150
1171
|
replace=True,
|
1151
1172
|
session=session,
|
1152
1173
|
statement_params=statement_params,
|
1174
|
+
anonymous=True
|
1153
1175
|
)
|
1154
1176
|
def score_wrapper_sproc(
|
1155
1177
|
session: Session,
|
@@ -1157,7 +1179,8 @@ class Isomap(BaseTransformer):
|
|
1157
1179
|
stage_score_file_name: str,
|
1158
1180
|
input_cols: List[str],
|
1159
1181
|
label_cols: List[str],
|
1160
|
-
sample_weight_col: Optional[str]
|
1182
|
+
sample_weight_col: Optional[str],
|
1183
|
+
statement_params: Dict[str, str]
|
1161
1184
|
) -> float:
|
1162
1185
|
import cloudpickle as cp
|
1163
1186
|
import numpy as np
|
@@ -1207,14 +1230,14 @@ class Isomap(BaseTransformer):
|
|
1207
1230
|
api_calls=[Session.call],
|
1208
1231
|
custom_tags=dict([("autogen", True)]),
|
1209
1232
|
)
|
1210
|
-
score =
|
1211
|
-
|
1233
|
+
score = score_wrapper_sproc(
|
1234
|
+
session,
|
1212
1235
|
query,
|
1213
1236
|
stage_score_file_name,
|
1214
1237
|
identifier.get_unescaped_names(self.input_cols),
|
1215
1238
|
identifier.get_unescaped_names(self.label_cols),
|
1216
1239
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1217
|
-
statement_params
|
1240
|
+
statement_params,
|
1218
1241
|
)
|
1219
1242
|
|
1220
1243
|
cleanup_temp_files([local_score_file_name])
|
@@ -1232,18 +1255,20 @@ class Isomap(BaseTransformer):
|
|
1232
1255
|
if self._sklearn_object._estimator_type == 'classifier':
|
1233
1256
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1234
1257
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1235
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1258
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1259
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1236
1260
|
# For regressor, the type of predict is float64
|
1237
1261
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1238
1262
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1239
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1240
|
-
|
1263
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1264
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1241
1265
|
for prob_func in PROB_FUNCTIONS:
|
1242
1266
|
if hasattr(self, prob_func):
|
1243
1267
|
output_cols_prefix: str = f"{prob_func}_"
|
1244
1268
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1245
1269
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1246
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1270
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1271
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1247
1272
|
|
1248
1273
|
@property
|
1249
1274
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -241,7 +243,6 @@ class MDS(BaseTransformer):
|
|
241
243
|
sample_weight_col: Optional[str] = None,
|
242
244
|
) -> None:
|
243
245
|
super().__init__()
|
244
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
245
246
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
246
247
|
|
247
248
|
self._deps = list(deps)
|
@@ -270,6 +271,15 @@ class MDS(BaseTransformer):
|
|
270
271
|
self.set_drop_input_cols(drop_input_cols)
|
271
272
|
self.set_sample_weight_col(sample_weight_col)
|
272
273
|
|
274
|
+
def _get_rand_id(self) -> str:
|
275
|
+
"""
|
276
|
+
Generate random id to be used in sproc and stage names.
|
277
|
+
|
278
|
+
Returns:
|
279
|
+
Random id string usable in sproc, table, and stage names.
|
280
|
+
"""
|
281
|
+
return str(uuid4()).replace("-", "_").upper()
|
282
|
+
|
273
283
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
274
284
|
"""
|
275
285
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -348,7 +358,7 @@ class MDS(BaseTransformer):
|
|
348
358
|
cp.dump(self._sklearn_object, local_transform_file)
|
349
359
|
|
350
360
|
# Create temp stage to run fit.
|
351
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
361
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
352
362
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
353
363
|
SqlResultValidator(
|
354
364
|
session=session,
|
@@ -361,11 +371,12 @@ class MDS(BaseTransformer):
|
|
361
371
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
362
372
|
).validate()
|
363
373
|
|
364
|
-
|
374
|
+
# Use posixpath to construct stage paths
|
375
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
376
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
365
377
|
local_result_file_name = get_temp_file_path()
|
366
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
367
378
|
|
368
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
379
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
369
380
|
statement_params = telemetry.get_function_usage_statement_params(
|
370
381
|
project=_PROJECT,
|
371
382
|
subproject=_SUBPROJECT,
|
@@ -391,6 +402,7 @@ class MDS(BaseTransformer):
|
|
391
402
|
replace=True,
|
392
403
|
session=session,
|
393
404
|
statement_params=statement_params,
|
405
|
+
anonymous=True
|
394
406
|
)
|
395
407
|
def fit_wrapper_sproc(
|
396
408
|
session: Session,
|
@@ -399,7 +411,8 @@ class MDS(BaseTransformer):
|
|
399
411
|
stage_result_file_name: str,
|
400
412
|
input_cols: List[str],
|
401
413
|
label_cols: List[str],
|
402
|
-
sample_weight_col: Optional[str]
|
414
|
+
sample_weight_col: Optional[str],
|
415
|
+
statement_params: Dict[str, str]
|
403
416
|
) -> str:
|
404
417
|
import cloudpickle as cp
|
405
418
|
import numpy as np
|
@@ -466,15 +479,15 @@ class MDS(BaseTransformer):
|
|
466
479
|
api_calls=[Session.call],
|
467
480
|
custom_tags=dict([("autogen", True)]),
|
468
481
|
)
|
469
|
-
sproc_export_file_name =
|
470
|
-
|
482
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
483
|
+
session,
|
471
484
|
query,
|
472
485
|
stage_transform_file_name,
|
473
486
|
stage_result_file_name,
|
474
487
|
identifier.get_unescaped_names(self.input_cols),
|
475
488
|
identifier.get_unescaped_names(self.label_cols),
|
476
489
|
identifier.get_unescaped_names(self.sample_weight_col),
|
477
|
-
statement_params
|
490
|
+
statement_params,
|
478
491
|
)
|
479
492
|
|
480
493
|
if "|" in sproc_export_file_name:
|
@@ -484,7 +497,7 @@ class MDS(BaseTransformer):
|
|
484
497
|
print("\n".join(fields[1:]))
|
485
498
|
|
486
499
|
session.file.get(
|
487
|
-
|
500
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
488
501
|
local_result_file_name,
|
489
502
|
statement_params=statement_params
|
490
503
|
)
|
@@ -530,7 +543,7 @@ class MDS(BaseTransformer):
|
|
530
543
|
|
531
544
|
# Register vectorized UDF for batch inference
|
532
545
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
533
|
-
safe_id=self.
|
546
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
534
547
|
|
535
548
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
536
549
|
# will try to pickle all of self which fails.
|
@@ -622,7 +635,7 @@ class MDS(BaseTransformer):
|
|
622
635
|
return transformed_pandas_df.to_dict("records")
|
623
636
|
|
624
637
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
625
|
-
safe_id=self.
|
638
|
+
safe_id=self._get_rand_id()
|
626
639
|
)
|
627
640
|
|
628
641
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -787,11 +800,18 @@ class MDS(BaseTransformer):
|
|
787
800
|
Transformed dataset.
|
788
801
|
"""
|
789
802
|
if isinstance(dataset, DataFrame):
|
803
|
+
expected_type_inferred = ""
|
804
|
+
# when it is classifier, infer the datatype from label columns
|
805
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
806
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
807
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
808
|
+
)
|
809
|
+
|
790
810
|
output_df = self._batch_inference(
|
791
811
|
dataset=dataset,
|
792
812
|
inference_method="predict",
|
793
813
|
expected_output_cols_list=self.output_cols,
|
794
|
-
expected_output_cols_type=
|
814
|
+
expected_output_cols_type=expected_type_inferred,
|
795
815
|
)
|
796
816
|
elif isinstance(dataset, pd.DataFrame):
|
797
817
|
output_df = self._sklearn_inference(
|
@@ -862,10 +882,10 @@ class MDS(BaseTransformer):
|
|
862
882
|
|
863
883
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
864
884
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
865
|
-
Returns
|
885
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
866
886
|
"""
|
867
887
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
868
|
-
return []
|
888
|
+
return [output_cols_prefix]
|
869
889
|
|
870
890
|
classes = self._sklearn_object.classes_
|
871
891
|
if isinstance(classes, numpy.ndarray):
|
@@ -1090,7 +1110,7 @@ class MDS(BaseTransformer):
|
|
1090
1110
|
cp.dump(self._sklearn_object, local_score_file)
|
1091
1111
|
|
1092
1112
|
# Create temp stage to run score.
|
1093
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1113
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1094
1114
|
session = dataset._session
|
1095
1115
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1096
1116
|
SqlResultValidator(
|
@@ -1104,8 +1124,9 @@ class MDS(BaseTransformer):
|
|
1104
1124
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1105
1125
|
).validate()
|
1106
1126
|
|
1107
|
-
|
1108
|
-
|
1127
|
+
# Use posixpath to construct stage paths
|
1128
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1129
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1109
1130
|
statement_params = telemetry.get_function_usage_statement_params(
|
1110
1131
|
project=_PROJECT,
|
1111
1132
|
subproject=_SUBPROJECT,
|
@@ -1131,6 +1152,7 @@ class MDS(BaseTransformer):
|
|
1131
1152
|
replace=True,
|
1132
1153
|
session=session,
|
1133
1154
|
statement_params=statement_params,
|
1155
|
+
anonymous=True
|
1134
1156
|
)
|
1135
1157
|
def score_wrapper_sproc(
|
1136
1158
|
session: Session,
|
@@ -1138,7 +1160,8 @@ class MDS(BaseTransformer):
|
|
1138
1160
|
stage_score_file_name: str,
|
1139
1161
|
input_cols: List[str],
|
1140
1162
|
label_cols: List[str],
|
1141
|
-
sample_weight_col: Optional[str]
|
1163
|
+
sample_weight_col: Optional[str],
|
1164
|
+
statement_params: Dict[str, str]
|
1142
1165
|
) -> float:
|
1143
1166
|
import cloudpickle as cp
|
1144
1167
|
import numpy as np
|
@@ -1188,14 +1211,14 @@ class MDS(BaseTransformer):
|
|
1188
1211
|
api_calls=[Session.call],
|
1189
1212
|
custom_tags=dict([("autogen", True)]),
|
1190
1213
|
)
|
1191
|
-
score =
|
1192
|
-
|
1214
|
+
score = score_wrapper_sproc(
|
1215
|
+
session,
|
1193
1216
|
query,
|
1194
1217
|
stage_score_file_name,
|
1195
1218
|
identifier.get_unescaped_names(self.input_cols),
|
1196
1219
|
identifier.get_unescaped_names(self.label_cols),
|
1197
1220
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1198
|
-
statement_params
|
1221
|
+
statement_params,
|
1199
1222
|
)
|
1200
1223
|
|
1201
1224
|
cleanup_temp_files([local_score_file_name])
|
@@ -1213,18 +1236,20 @@ class MDS(BaseTransformer):
|
|
1213
1236
|
if self._sklearn_object._estimator_type == 'classifier':
|
1214
1237
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1215
1238
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1216
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1239
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1240
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1217
1241
|
# For regressor, the type of predict is float64
|
1218
1242
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1219
1243
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1220
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
-
|
1244
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1245
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1222
1246
|
for prob_func in PROB_FUNCTIONS:
|
1223
1247
|
if hasattr(self, prob_func):
|
1224
1248
|
output_cols_prefix: str = f"{prob_func}_"
|
1225
1249
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1226
1250
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1227
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1251
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1252
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1228
1253
|
|
1229
1254
|
@property
|
1230
1255
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|