snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -218,7 +220,6 @@ class LabelSpreading(BaseTransformer):
|
|
218
220
|
sample_weight_col: Optional[str] = None,
|
219
221
|
) -> None:
|
220
222
|
super().__init__()
|
221
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
222
223
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
223
224
|
|
224
225
|
self._deps = list(deps)
|
@@ -244,6 +245,15 @@ class LabelSpreading(BaseTransformer):
|
|
244
245
|
self.set_drop_input_cols(drop_input_cols)
|
245
246
|
self.set_sample_weight_col(sample_weight_col)
|
246
247
|
|
248
|
+
def _get_rand_id(self) -> str:
|
249
|
+
"""
|
250
|
+
Generate random id to be used in sproc and stage names.
|
251
|
+
|
252
|
+
Returns:
|
253
|
+
Random id string usable in sproc, table, and stage names.
|
254
|
+
"""
|
255
|
+
return str(uuid4()).replace("-", "_").upper()
|
256
|
+
|
247
257
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
248
258
|
"""
|
249
259
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -322,7 +332,7 @@ class LabelSpreading(BaseTransformer):
|
|
322
332
|
cp.dump(self._sklearn_object, local_transform_file)
|
323
333
|
|
324
334
|
# Create temp stage to run fit.
|
325
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
335
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
326
336
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
327
337
|
SqlResultValidator(
|
328
338
|
session=session,
|
@@ -335,11 +345,12 @@ class LabelSpreading(BaseTransformer):
|
|
335
345
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
336
346
|
).validate()
|
337
347
|
|
338
|
-
|
348
|
+
# Use posixpath to construct stage paths
|
349
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
350
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
339
351
|
local_result_file_name = get_temp_file_path()
|
340
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
341
352
|
|
342
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
353
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
343
354
|
statement_params = telemetry.get_function_usage_statement_params(
|
344
355
|
project=_PROJECT,
|
345
356
|
subproject=_SUBPROJECT,
|
@@ -365,6 +376,7 @@ class LabelSpreading(BaseTransformer):
|
|
365
376
|
replace=True,
|
366
377
|
session=session,
|
367
378
|
statement_params=statement_params,
|
379
|
+
anonymous=True
|
368
380
|
)
|
369
381
|
def fit_wrapper_sproc(
|
370
382
|
session: Session,
|
@@ -373,7 +385,8 @@ class LabelSpreading(BaseTransformer):
|
|
373
385
|
stage_result_file_name: str,
|
374
386
|
input_cols: List[str],
|
375
387
|
label_cols: List[str],
|
376
|
-
sample_weight_col: Optional[str]
|
388
|
+
sample_weight_col: Optional[str],
|
389
|
+
statement_params: Dict[str, str]
|
377
390
|
) -> str:
|
378
391
|
import cloudpickle as cp
|
379
392
|
import numpy as np
|
@@ -440,15 +453,15 @@ class LabelSpreading(BaseTransformer):
|
|
440
453
|
api_calls=[Session.call],
|
441
454
|
custom_tags=dict([("autogen", True)]),
|
442
455
|
)
|
443
|
-
sproc_export_file_name =
|
444
|
-
|
456
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
457
|
+
session,
|
445
458
|
query,
|
446
459
|
stage_transform_file_name,
|
447
460
|
stage_result_file_name,
|
448
461
|
identifier.get_unescaped_names(self.input_cols),
|
449
462
|
identifier.get_unescaped_names(self.label_cols),
|
450
463
|
identifier.get_unescaped_names(self.sample_weight_col),
|
451
|
-
statement_params
|
464
|
+
statement_params,
|
452
465
|
)
|
453
466
|
|
454
467
|
if "|" in sproc_export_file_name:
|
@@ -458,7 +471,7 @@ class LabelSpreading(BaseTransformer):
|
|
458
471
|
print("\n".join(fields[1:]))
|
459
472
|
|
460
473
|
session.file.get(
|
461
|
-
|
474
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
462
475
|
local_result_file_name,
|
463
476
|
statement_params=statement_params
|
464
477
|
)
|
@@ -504,7 +517,7 @@ class LabelSpreading(BaseTransformer):
|
|
504
517
|
|
505
518
|
# Register vectorized UDF for batch inference
|
506
519
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
507
|
-
safe_id=self.
|
520
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
508
521
|
|
509
522
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
510
523
|
# will try to pickle all of self which fails.
|
@@ -596,7 +609,7 @@ class LabelSpreading(BaseTransformer):
|
|
596
609
|
return transformed_pandas_df.to_dict("records")
|
597
610
|
|
598
611
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
599
|
-
safe_id=self.
|
612
|
+
safe_id=self._get_rand_id()
|
600
613
|
)
|
601
614
|
|
602
615
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -763,11 +776,18 @@ class LabelSpreading(BaseTransformer):
|
|
763
776
|
Transformed dataset.
|
764
777
|
"""
|
765
778
|
if isinstance(dataset, DataFrame):
|
779
|
+
expected_type_inferred = ""
|
780
|
+
# when it is classifier, infer the datatype from label columns
|
781
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
782
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
783
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
784
|
+
)
|
785
|
+
|
766
786
|
output_df = self._batch_inference(
|
767
787
|
dataset=dataset,
|
768
788
|
inference_method="predict",
|
769
789
|
expected_output_cols_list=self.output_cols,
|
770
|
-
expected_output_cols_type=
|
790
|
+
expected_output_cols_type=expected_type_inferred,
|
771
791
|
)
|
772
792
|
elif isinstance(dataset, pd.DataFrame):
|
773
793
|
output_df = self._sklearn_inference(
|
@@ -838,10 +858,10 @@ class LabelSpreading(BaseTransformer):
|
|
838
858
|
|
839
859
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
840
860
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
841
|
-
Returns
|
861
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
842
862
|
"""
|
843
863
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
844
|
-
return []
|
864
|
+
return [output_cols_prefix]
|
845
865
|
|
846
866
|
classes = self._sklearn_object.classes_
|
847
867
|
if isinstance(classes, numpy.ndarray):
|
@@ -1070,7 +1090,7 @@ class LabelSpreading(BaseTransformer):
|
|
1070
1090
|
cp.dump(self._sklearn_object, local_score_file)
|
1071
1091
|
|
1072
1092
|
# Create temp stage to run score.
|
1073
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1093
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1074
1094
|
session = dataset._session
|
1075
1095
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1076
1096
|
SqlResultValidator(
|
@@ -1084,8 +1104,9 @@ class LabelSpreading(BaseTransformer):
|
|
1084
1104
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1085
1105
|
).validate()
|
1086
1106
|
|
1087
|
-
|
1088
|
-
|
1107
|
+
# Use posixpath to construct stage paths
|
1108
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1109
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1089
1110
|
statement_params = telemetry.get_function_usage_statement_params(
|
1090
1111
|
project=_PROJECT,
|
1091
1112
|
subproject=_SUBPROJECT,
|
@@ -1111,6 +1132,7 @@ class LabelSpreading(BaseTransformer):
|
|
1111
1132
|
replace=True,
|
1112
1133
|
session=session,
|
1113
1134
|
statement_params=statement_params,
|
1135
|
+
anonymous=True
|
1114
1136
|
)
|
1115
1137
|
def score_wrapper_sproc(
|
1116
1138
|
session: Session,
|
@@ -1118,7 +1140,8 @@ class LabelSpreading(BaseTransformer):
|
|
1118
1140
|
stage_score_file_name: str,
|
1119
1141
|
input_cols: List[str],
|
1120
1142
|
label_cols: List[str],
|
1121
|
-
sample_weight_col: Optional[str]
|
1143
|
+
sample_weight_col: Optional[str],
|
1144
|
+
statement_params: Dict[str, str]
|
1122
1145
|
) -> float:
|
1123
1146
|
import cloudpickle as cp
|
1124
1147
|
import numpy as np
|
@@ -1168,14 +1191,14 @@ class LabelSpreading(BaseTransformer):
|
|
1168
1191
|
api_calls=[Session.call],
|
1169
1192
|
custom_tags=dict([("autogen", True)]),
|
1170
1193
|
)
|
1171
|
-
score =
|
1172
|
-
|
1194
|
+
score = score_wrapper_sproc(
|
1195
|
+
session,
|
1173
1196
|
query,
|
1174
1197
|
stage_score_file_name,
|
1175
1198
|
identifier.get_unescaped_names(self.input_cols),
|
1176
1199
|
identifier.get_unescaped_names(self.label_cols),
|
1177
1200
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1178
|
-
statement_params
|
1201
|
+
statement_params,
|
1179
1202
|
)
|
1180
1203
|
|
1181
1204
|
cleanup_temp_files([local_score_file_name])
|
@@ -1193,18 +1216,20 @@ class LabelSpreading(BaseTransformer):
|
|
1193
1216
|
if self._sklearn_object._estimator_type == 'classifier':
|
1194
1217
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1195
1218
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1196
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1219
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1220
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1197
1221
|
# For regressor, the type of predict is float64
|
1198
1222
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1199
1223
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1200
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1201
|
-
|
1224
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1225
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1202
1226
|
for prob_func in PROB_FUNCTIONS:
|
1203
1227
|
if hasattr(self, prob_func):
|
1204
1228
|
output_cols_prefix: str = f"{prob_func}_"
|
1205
1229
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1206
1230
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1207
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1231
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1232
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1208
1233
|
|
1209
1234
|
@property
|
1210
1235
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -264,7 +266,6 @@ class LinearSVC(BaseTransformer):
|
|
264
266
|
sample_weight_col: Optional[str] = None,
|
265
267
|
) -> None:
|
266
268
|
super().__init__()
|
267
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
268
269
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
269
270
|
|
270
271
|
self._deps = list(deps)
|
@@ -295,6 +296,15 @@ class LinearSVC(BaseTransformer):
|
|
295
296
|
self.set_drop_input_cols(drop_input_cols)
|
296
297
|
self.set_sample_weight_col(sample_weight_col)
|
297
298
|
|
299
|
+
def _get_rand_id(self) -> str:
|
300
|
+
"""
|
301
|
+
Generate random id to be used in sproc and stage names.
|
302
|
+
|
303
|
+
Returns:
|
304
|
+
Random id string usable in sproc, table, and stage names.
|
305
|
+
"""
|
306
|
+
return str(uuid4()).replace("-", "_").upper()
|
307
|
+
|
298
308
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
299
309
|
"""
|
300
310
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -373,7 +383,7 @@ class LinearSVC(BaseTransformer):
|
|
373
383
|
cp.dump(self._sklearn_object, local_transform_file)
|
374
384
|
|
375
385
|
# Create temp stage to run fit.
|
376
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
386
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
377
387
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
378
388
|
SqlResultValidator(
|
379
389
|
session=session,
|
@@ -386,11 +396,12 @@ class LinearSVC(BaseTransformer):
|
|
386
396
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
387
397
|
).validate()
|
388
398
|
|
389
|
-
|
399
|
+
# Use posixpath to construct stage paths
|
400
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
401
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
390
402
|
local_result_file_name = get_temp_file_path()
|
391
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
392
403
|
|
393
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
404
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
394
405
|
statement_params = telemetry.get_function_usage_statement_params(
|
395
406
|
project=_PROJECT,
|
396
407
|
subproject=_SUBPROJECT,
|
@@ -416,6 +427,7 @@ class LinearSVC(BaseTransformer):
|
|
416
427
|
replace=True,
|
417
428
|
session=session,
|
418
429
|
statement_params=statement_params,
|
430
|
+
anonymous=True
|
419
431
|
)
|
420
432
|
def fit_wrapper_sproc(
|
421
433
|
session: Session,
|
@@ -424,7 +436,8 @@ class LinearSVC(BaseTransformer):
|
|
424
436
|
stage_result_file_name: str,
|
425
437
|
input_cols: List[str],
|
426
438
|
label_cols: List[str],
|
427
|
-
sample_weight_col: Optional[str]
|
439
|
+
sample_weight_col: Optional[str],
|
440
|
+
statement_params: Dict[str, str]
|
428
441
|
) -> str:
|
429
442
|
import cloudpickle as cp
|
430
443
|
import numpy as np
|
@@ -491,15 +504,15 @@ class LinearSVC(BaseTransformer):
|
|
491
504
|
api_calls=[Session.call],
|
492
505
|
custom_tags=dict([("autogen", True)]),
|
493
506
|
)
|
494
|
-
sproc_export_file_name =
|
495
|
-
|
507
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
508
|
+
session,
|
496
509
|
query,
|
497
510
|
stage_transform_file_name,
|
498
511
|
stage_result_file_name,
|
499
512
|
identifier.get_unescaped_names(self.input_cols),
|
500
513
|
identifier.get_unescaped_names(self.label_cols),
|
501
514
|
identifier.get_unescaped_names(self.sample_weight_col),
|
502
|
-
statement_params
|
515
|
+
statement_params,
|
503
516
|
)
|
504
517
|
|
505
518
|
if "|" in sproc_export_file_name:
|
@@ -509,7 +522,7 @@ class LinearSVC(BaseTransformer):
|
|
509
522
|
print("\n".join(fields[1:]))
|
510
523
|
|
511
524
|
session.file.get(
|
512
|
-
|
525
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
513
526
|
local_result_file_name,
|
514
527
|
statement_params=statement_params
|
515
528
|
)
|
@@ -555,7 +568,7 @@ class LinearSVC(BaseTransformer):
|
|
555
568
|
|
556
569
|
# Register vectorized UDF for batch inference
|
557
570
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
558
|
-
safe_id=self.
|
571
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
559
572
|
|
560
573
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
561
574
|
# will try to pickle all of self which fails.
|
@@ -647,7 +660,7 @@ class LinearSVC(BaseTransformer):
|
|
647
660
|
return transformed_pandas_df.to_dict("records")
|
648
661
|
|
649
662
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
650
|
-
safe_id=self.
|
663
|
+
safe_id=self._get_rand_id()
|
651
664
|
)
|
652
665
|
|
653
666
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -814,11 +827,18 @@ class LinearSVC(BaseTransformer):
|
|
814
827
|
Transformed dataset.
|
815
828
|
"""
|
816
829
|
if isinstance(dataset, DataFrame):
|
830
|
+
expected_type_inferred = ""
|
831
|
+
# when it is classifier, infer the datatype from label columns
|
832
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
833
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
834
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
835
|
+
)
|
836
|
+
|
817
837
|
output_df = self._batch_inference(
|
818
838
|
dataset=dataset,
|
819
839
|
inference_method="predict",
|
820
840
|
expected_output_cols_list=self.output_cols,
|
821
|
-
expected_output_cols_type=
|
841
|
+
expected_output_cols_type=expected_type_inferred,
|
822
842
|
)
|
823
843
|
elif isinstance(dataset, pd.DataFrame):
|
824
844
|
output_df = self._sklearn_inference(
|
@@ -889,10 +909,10 @@ class LinearSVC(BaseTransformer):
|
|
889
909
|
|
890
910
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
891
911
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
892
|
-
Returns
|
912
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
893
913
|
"""
|
894
914
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
895
|
-
return []
|
915
|
+
return [output_cols_prefix]
|
896
916
|
|
897
917
|
classes = self._sklearn_object.classes_
|
898
918
|
if isinstance(classes, numpy.ndarray):
|
@@ -1119,7 +1139,7 @@ class LinearSVC(BaseTransformer):
|
|
1119
1139
|
cp.dump(self._sklearn_object, local_score_file)
|
1120
1140
|
|
1121
1141
|
# Create temp stage to run score.
|
1122
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1142
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1123
1143
|
session = dataset._session
|
1124
1144
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1125
1145
|
SqlResultValidator(
|
@@ -1133,8 +1153,9 @@ class LinearSVC(BaseTransformer):
|
|
1133
1153
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1134
1154
|
).validate()
|
1135
1155
|
|
1136
|
-
|
1137
|
-
|
1156
|
+
# Use posixpath to construct stage paths
|
1157
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1158
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1138
1159
|
statement_params = telemetry.get_function_usage_statement_params(
|
1139
1160
|
project=_PROJECT,
|
1140
1161
|
subproject=_SUBPROJECT,
|
@@ -1160,6 +1181,7 @@ class LinearSVC(BaseTransformer):
|
|
1160
1181
|
replace=True,
|
1161
1182
|
session=session,
|
1162
1183
|
statement_params=statement_params,
|
1184
|
+
anonymous=True
|
1163
1185
|
)
|
1164
1186
|
def score_wrapper_sproc(
|
1165
1187
|
session: Session,
|
@@ -1167,7 +1189,8 @@ class LinearSVC(BaseTransformer):
|
|
1167
1189
|
stage_score_file_name: str,
|
1168
1190
|
input_cols: List[str],
|
1169
1191
|
label_cols: List[str],
|
1170
|
-
sample_weight_col: Optional[str]
|
1192
|
+
sample_weight_col: Optional[str],
|
1193
|
+
statement_params: Dict[str, str]
|
1171
1194
|
) -> float:
|
1172
1195
|
import cloudpickle as cp
|
1173
1196
|
import numpy as np
|
@@ -1217,14 +1240,14 @@ class LinearSVC(BaseTransformer):
|
|
1217
1240
|
api_calls=[Session.call],
|
1218
1241
|
custom_tags=dict([("autogen", True)]),
|
1219
1242
|
)
|
1220
|
-
score =
|
1221
|
-
|
1243
|
+
score = score_wrapper_sproc(
|
1244
|
+
session,
|
1222
1245
|
query,
|
1223
1246
|
stage_score_file_name,
|
1224
1247
|
identifier.get_unescaped_names(self.input_cols),
|
1225
1248
|
identifier.get_unescaped_names(self.label_cols),
|
1226
1249
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1227
|
-
statement_params
|
1250
|
+
statement_params,
|
1228
1251
|
)
|
1229
1252
|
|
1230
1253
|
cleanup_temp_files([local_score_file_name])
|
@@ -1242,18 +1265,20 @@ class LinearSVC(BaseTransformer):
|
|
1242
1265
|
if self._sklearn_object._estimator_type == 'classifier':
|
1243
1266
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1244
1267
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1245
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1268
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1269
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1246
1270
|
# For regressor, the type of predict is float64
|
1247
1271
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1248
1272
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1249
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1250
|
-
|
1273
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1274
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1251
1275
|
for prob_func in PROB_FUNCTIONS:
|
1252
1276
|
if hasattr(self, prob_func):
|
1253
1277
|
output_cols_prefix: str = f"{prob_func}_"
|
1254
1278
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1255
1279
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1256
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1280
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1281
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1257
1282
|
|
1258
1283
|
@property
|
1259
1284
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|