snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -208,7 +210,6 @@ class PolynomialFeatures(BaseTransformer):
|
|
208
210
|
sample_weight_col: Optional[str] = None,
|
209
211
|
) -> None:
|
210
212
|
super().__init__()
|
211
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
212
213
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
213
214
|
|
214
215
|
self._deps = list(deps)
|
@@ -231,6 +232,15 @@ class PolynomialFeatures(BaseTransformer):
|
|
231
232
|
self.set_drop_input_cols(drop_input_cols)
|
232
233
|
self.set_sample_weight_col(sample_weight_col)
|
233
234
|
|
235
|
+
def _get_rand_id(self) -> str:
|
236
|
+
"""
|
237
|
+
Generate random id to be used in sproc and stage names.
|
238
|
+
|
239
|
+
Returns:
|
240
|
+
Random id string usable in sproc, table, and stage names.
|
241
|
+
"""
|
242
|
+
return str(uuid4()).replace("-", "_").upper()
|
243
|
+
|
234
244
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
235
245
|
"""
|
236
246
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -309,7 +319,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
309
319
|
cp.dump(self._sklearn_object, local_transform_file)
|
310
320
|
|
311
321
|
# Create temp stage to run fit.
|
312
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
322
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
313
323
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
314
324
|
SqlResultValidator(
|
315
325
|
session=session,
|
@@ -322,11 +332,12 @@ class PolynomialFeatures(BaseTransformer):
|
|
322
332
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
323
333
|
).validate()
|
324
334
|
|
325
|
-
|
335
|
+
# Use posixpath to construct stage paths
|
336
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
337
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
326
338
|
local_result_file_name = get_temp_file_path()
|
327
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
328
339
|
|
329
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
340
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
330
341
|
statement_params = telemetry.get_function_usage_statement_params(
|
331
342
|
project=_PROJECT,
|
332
343
|
subproject=_SUBPROJECT,
|
@@ -352,6 +363,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
352
363
|
replace=True,
|
353
364
|
session=session,
|
354
365
|
statement_params=statement_params,
|
366
|
+
anonymous=True
|
355
367
|
)
|
356
368
|
def fit_wrapper_sproc(
|
357
369
|
session: Session,
|
@@ -360,7 +372,8 @@ class PolynomialFeatures(BaseTransformer):
|
|
360
372
|
stage_result_file_name: str,
|
361
373
|
input_cols: List[str],
|
362
374
|
label_cols: List[str],
|
363
|
-
sample_weight_col: Optional[str]
|
375
|
+
sample_weight_col: Optional[str],
|
376
|
+
statement_params: Dict[str, str]
|
364
377
|
) -> str:
|
365
378
|
import cloudpickle as cp
|
366
379
|
import numpy as np
|
@@ -427,15 +440,15 @@ class PolynomialFeatures(BaseTransformer):
|
|
427
440
|
api_calls=[Session.call],
|
428
441
|
custom_tags=dict([("autogen", True)]),
|
429
442
|
)
|
430
|
-
sproc_export_file_name =
|
431
|
-
|
443
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
444
|
+
session,
|
432
445
|
query,
|
433
446
|
stage_transform_file_name,
|
434
447
|
stage_result_file_name,
|
435
448
|
identifier.get_unescaped_names(self.input_cols),
|
436
449
|
identifier.get_unescaped_names(self.label_cols),
|
437
450
|
identifier.get_unescaped_names(self.sample_weight_col),
|
438
|
-
statement_params
|
451
|
+
statement_params,
|
439
452
|
)
|
440
453
|
|
441
454
|
if "|" in sproc_export_file_name:
|
@@ -445,7 +458,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
445
458
|
print("\n".join(fields[1:]))
|
446
459
|
|
447
460
|
session.file.get(
|
448
|
-
|
461
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
449
462
|
local_result_file_name,
|
450
463
|
statement_params=statement_params
|
451
464
|
)
|
@@ -491,7 +504,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
491
504
|
|
492
505
|
# Register vectorized UDF for batch inference
|
493
506
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
494
|
-
safe_id=self.
|
507
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
495
508
|
|
496
509
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
497
510
|
# will try to pickle all of self which fails.
|
@@ -583,7 +596,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
583
596
|
return transformed_pandas_df.to_dict("records")
|
584
597
|
|
585
598
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
586
|
-
safe_id=self.
|
599
|
+
safe_id=self._get_rand_id()
|
587
600
|
)
|
588
601
|
|
589
602
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -748,11 +761,18 @@ class PolynomialFeatures(BaseTransformer):
|
|
748
761
|
Transformed dataset.
|
749
762
|
"""
|
750
763
|
if isinstance(dataset, DataFrame):
|
764
|
+
expected_type_inferred = ""
|
765
|
+
# when it is classifier, infer the datatype from label columns
|
766
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
767
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
768
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
769
|
+
)
|
770
|
+
|
751
771
|
output_df = self._batch_inference(
|
752
772
|
dataset=dataset,
|
753
773
|
inference_method="predict",
|
754
774
|
expected_output_cols_list=self.output_cols,
|
755
|
-
expected_output_cols_type=
|
775
|
+
expected_output_cols_type=expected_type_inferred,
|
756
776
|
)
|
757
777
|
elif isinstance(dataset, pd.DataFrame):
|
758
778
|
output_df = self._sklearn_inference(
|
@@ -825,10 +845,10 @@ class PolynomialFeatures(BaseTransformer):
|
|
825
845
|
|
826
846
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
827
847
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
828
|
-
Returns
|
848
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
829
849
|
"""
|
830
850
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
831
|
-
return []
|
851
|
+
return [output_cols_prefix]
|
832
852
|
|
833
853
|
classes = self._sklearn_object.classes_
|
834
854
|
if isinstance(classes, numpy.ndarray):
|
@@ -1053,7 +1073,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
1053
1073
|
cp.dump(self._sklearn_object, local_score_file)
|
1054
1074
|
|
1055
1075
|
# Create temp stage to run score.
|
1056
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1076
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1057
1077
|
session = dataset._session
|
1058
1078
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1059
1079
|
SqlResultValidator(
|
@@ -1067,8 +1087,9 @@ class PolynomialFeatures(BaseTransformer):
|
|
1067
1087
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1068
1088
|
).validate()
|
1069
1089
|
|
1070
|
-
|
1071
|
-
|
1090
|
+
# Use posixpath to construct stage paths
|
1091
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1092
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1072
1093
|
statement_params = telemetry.get_function_usage_statement_params(
|
1073
1094
|
project=_PROJECT,
|
1074
1095
|
subproject=_SUBPROJECT,
|
@@ -1094,6 +1115,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
1094
1115
|
replace=True,
|
1095
1116
|
session=session,
|
1096
1117
|
statement_params=statement_params,
|
1118
|
+
anonymous=True
|
1097
1119
|
)
|
1098
1120
|
def score_wrapper_sproc(
|
1099
1121
|
session: Session,
|
@@ -1101,7 +1123,8 @@ class PolynomialFeatures(BaseTransformer):
|
|
1101
1123
|
stage_score_file_name: str,
|
1102
1124
|
input_cols: List[str],
|
1103
1125
|
label_cols: List[str],
|
1104
|
-
sample_weight_col: Optional[str]
|
1126
|
+
sample_weight_col: Optional[str],
|
1127
|
+
statement_params: Dict[str, str]
|
1105
1128
|
) -> float:
|
1106
1129
|
import cloudpickle as cp
|
1107
1130
|
import numpy as np
|
@@ -1151,14 +1174,14 @@ class PolynomialFeatures(BaseTransformer):
|
|
1151
1174
|
api_calls=[Session.call],
|
1152
1175
|
custom_tags=dict([("autogen", True)]),
|
1153
1176
|
)
|
1154
|
-
score =
|
1155
|
-
|
1177
|
+
score = score_wrapper_sproc(
|
1178
|
+
session,
|
1156
1179
|
query,
|
1157
1180
|
stage_score_file_name,
|
1158
1181
|
identifier.get_unescaped_names(self.input_cols),
|
1159
1182
|
identifier.get_unescaped_names(self.label_cols),
|
1160
1183
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1161
|
-
statement_params
|
1184
|
+
statement_params,
|
1162
1185
|
)
|
1163
1186
|
|
1164
1187
|
cleanup_temp_files([local_score_file_name])
|
@@ -1176,18 +1199,20 @@ class PolynomialFeatures(BaseTransformer):
|
|
1176
1199
|
if self._sklearn_object._estimator_type == 'classifier':
|
1177
1200
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1178
1201
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1179
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1202
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1203
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1180
1204
|
# For regressor, the type of predict is float64
|
1181
1205
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1182
1206
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1183
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1184
|
-
|
1207
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1208
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1185
1209
|
for prob_func in PROB_FUNCTIONS:
|
1186
1210
|
if hasattr(self, prob_func):
|
1187
1211
|
output_cols_prefix: str = f"{prob_func}_"
|
1188
1212
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1189
1213
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1190
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1214
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1215
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1191
1216
|
|
1192
1217
|
@property
|
1193
1218
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -210,7 +212,6 @@ class LabelPropagation(BaseTransformer):
|
|
210
212
|
sample_weight_col: Optional[str] = None,
|
211
213
|
) -> None:
|
212
214
|
super().__init__()
|
213
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
214
215
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
215
216
|
|
216
217
|
self._deps = list(deps)
|
@@ -235,6 +236,15 @@ class LabelPropagation(BaseTransformer):
|
|
235
236
|
self.set_drop_input_cols(drop_input_cols)
|
236
237
|
self.set_sample_weight_col(sample_weight_col)
|
237
238
|
|
239
|
+
def _get_rand_id(self) -> str:
|
240
|
+
"""
|
241
|
+
Generate random id to be used in sproc and stage names.
|
242
|
+
|
243
|
+
Returns:
|
244
|
+
Random id string usable in sproc, table, and stage names.
|
245
|
+
"""
|
246
|
+
return str(uuid4()).replace("-", "_").upper()
|
247
|
+
|
238
248
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
239
249
|
"""
|
240
250
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -313,7 +323,7 @@ class LabelPropagation(BaseTransformer):
|
|
313
323
|
cp.dump(self._sklearn_object, local_transform_file)
|
314
324
|
|
315
325
|
# Create temp stage to run fit.
|
316
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
326
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
317
327
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
318
328
|
SqlResultValidator(
|
319
329
|
session=session,
|
@@ -326,11 +336,12 @@ class LabelPropagation(BaseTransformer):
|
|
326
336
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
327
337
|
).validate()
|
328
338
|
|
329
|
-
|
339
|
+
# Use posixpath to construct stage paths
|
340
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
341
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
330
342
|
local_result_file_name = get_temp_file_path()
|
331
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
332
343
|
|
333
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
344
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
334
345
|
statement_params = telemetry.get_function_usage_statement_params(
|
335
346
|
project=_PROJECT,
|
336
347
|
subproject=_SUBPROJECT,
|
@@ -356,6 +367,7 @@ class LabelPropagation(BaseTransformer):
|
|
356
367
|
replace=True,
|
357
368
|
session=session,
|
358
369
|
statement_params=statement_params,
|
370
|
+
anonymous=True
|
359
371
|
)
|
360
372
|
def fit_wrapper_sproc(
|
361
373
|
session: Session,
|
@@ -364,7 +376,8 @@ class LabelPropagation(BaseTransformer):
|
|
364
376
|
stage_result_file_name: str,
|
365
377
|
input_cols: List[str],
|
366
378
|
label_cols: List[str],
|
367
|
-
sample_weight_col: Optional[str]
|
379
|
+
sample_weight_col: Optional[str],
|
380
|
+
statement_params: Dict[str, str]
|
368
381
|
) -> str:
|
369
382
|
import cloudpickle as cp
|
370
383
|
import numpy as np
|
@@ -431,15 +444,15 @@ class LabelPropagation(BaseTransformer):
|
|
431
444
|
api_calls=[Session.call],
|
432
445
|
custom_tags=dict([("autogen", True)]),
|
433
446
|
)
|
434
|
-
sproc_export_file_name =
|
435
|
-
|
447
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
448
|
+
session,
|
436
449
|
query,
|
437
450
|
stage_transform_file_name,
|
438
451
|
stage_result_file_name,
|
439
452
|
identifier.get_unescaped_names(self.input_cols),
|
440
453
|
identifier.get_unescaped_names(self.label_cols),
|
441
454
|
identifier.get_unescaped_names(self.sample_weight_col),
|
442
|
-
statement_params
|
455
|
+
statement_params,
|
443
456
|
)
|
444
457
|
|
445
458
|
if "|" in sproc_export_file_name:
|
@@ -449,7 +462,7 @@ class LabelPropagation(BaseTransformer):
|
|
449
462
|
print("\n".join(fields[1:]))
|
450
463
|
|
451
464
|
session.file.get(
|
452
|
-
|
465
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
453
466
|
local_result_file_name,
|
454
467
|
statement_params=statement_params
|
455
468
|
)
|
@@ -495,7 +508,7 @@ class LabelPropagation(BaseTransformer):
|
|
495
508
|
|
496
509
|
# Register vectorized UDF for batch inference
|
497
510
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
498
|
-
safe_id=self.
|
511
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
499
512
|
|
500
513
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
501
514
|
# will try to pickle all of self which fails.
|
@@ -587,7 +600,7 @@ class LabelPropagation(BaseTransformer):
|
|
587
600
|
return transformed_pandas_df.to_dict("records")
|
588
601
|
|
589
602
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
590
|
-
safe_id=self.
|
603
|
+
safe_id=self._get_rand_id()
|
591
604
|
)
|
592
605
|
|
593
606
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -754,11 +767,18 @@ class LabelPropagation(BaseTransformer):
|
|
754
767
|
Transformed dataset.
|
755
768
|
"""
|
756
769
|
if isinstance(dataset, DataFrame):
|
770
|
+
expected_type_inferred = ""
|
771
|
+
# when it is classifier, infer the datatype from label columns
|
772
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
773
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
774
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
775
|
+
)
|
776
|
+
|
757
777
|
output_df = self._batch_inference(
|
758
778
|
dataset=dataset,
|
759
779
|
inference_method="predict",
|
760
780
|
expected_output_cols_list=self.output_cols,
|
761
|
-
expected_output_cols_type=
|
781
|
+
expected_output_cols_type=expected_type_inferred,
|
762
782
|
)
|
763
783
|
elif isinstance(dataset, pd.DataFrame):
|
764
784
|
output_df = self._sklearn_inference(
|
@@ -829,10 +849,10 @@ class LabelPropagation(BaseTransformer):
|
|
829
849
|
|
830
850
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
831
851
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
832
|
-
Returns
|
852
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
833
853
|
"""
|
834
854
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
835
|
-
return []
|
855
|
+
return [output_cols_prefix]
|
836
856
|
|
837
857
|
classes = self._sklearn_object.classes_
|
838
858
|
if isinstance(classes, numpy.ndarray):
|
@@ -1061,7 +1081,7 @@ class LabelPropagation(BaseTransformer):
|
|
1061
1081
|
cp.dump(self._sklearn_object, local_score_file)
|
1062
1082
|
|
1063
1083
|
# Create temp stage to run score.
|
1064
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1084
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1065
1085
|
session = dataset._session
|
1066
1086
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1067
1087
|
SqlResultValidator(
|
@@ -1075,8 +1095,9 @@ class LabelPropagation(BaseTransformer):
|
|
1075
1095
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1076
1096
|
).validate()
|
1077
1097
|
|
1078
|
-
|
1079
|
-
|
1098
|
+
# Use posixpath to construct stage paths
|
1099
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1100
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1080
1101
|
statement_params = telemetry.get_function_usage_statement_params(
|
1081
1102
|
project=_PROJECT,
|
1082
1103
|
subproject=_SUBPROJECT,
|
@@ -1102,6 +1123,7 @@ class LabelPropagation(BaseTransformer):
|
|
1102
1123
|
replace=True,
|
1103
1124
|
session=session,
|
1104
1125
|
statement_params=statement_params,
|
1126
|
+
anonymous=True
|
1105
1127
|
)
|
1106
1128
|
def score_wrapper_sproc(
|
1107
1129
|
session: Session,
|
@@ -1109,7 +1131,8 @@ class LabelPropagation(BaseTransformer):
|
|
1109
1131
|
stage_score_file_name: str,
|
1110
1132
|
input_cols: List[str],
|
1111
1133
|
label_cols: List[str],
|
1112
|
-
sample_weight_col: Optional[str]
|
1134
|
+
sample_weight_col: Optional[str],
|
1135
|
+
statement_params: Dict[str, str]
|
1113
1136
|
) -> float:
|
1114
1137
|
import cloudpickle as cp
|
1115
1138
|
import numpy as np
|
@@ -1159,14 +1182,14 @@ class LabelPropagation(BaseTransformer):
|
|
1159
1182
|
api_calls=[Session.call],
|
1160
1183
|
custom_tags=dict([("autogen", True)]),
|
1161
1184
|
)
|
1162
|
-
score =
|
1163
|
-
|
1185
|
+
score = score_wrapper_sproc(
|
1186
|
+
session,
|
1164
1187
|
query,
|
1165
1188
|
stage_score_file_name,
|
1166
1189
|
identifier.get_unescaped_names(self.input_cols),
|
1167
1190
|
identifier.get_unescaped_names(self.label_cols),
|
1168
1191
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1169
|
-
statement_params
|
1192
|
+
statement_params,
|
1170
1193
|
)
|
1171
1194
|
|
1172
1195
|
cleanup_temp_files([local_score_file_name])
|
@@ -1184,18 +1207,20 @@ class LabelPropagation(BaseTransformer):
|
|
1184
1207
|
if self._sklearn_object._estimator_type == 'classifier':
|
1185
1208
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1186
1209
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1187
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1210
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1211
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1188
1212
|
# For regressor, the type of predict is float64
|
1189
1213
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1190
1214
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1191
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1192
|
-
|
1215
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1216
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1193
1217
|
for prob_func in PROB_FUNCTIONS:
|
1194
1218
|
if hasattr(self, prob_func):
|
1195
1219
|
output_cols_prefix: str = f"{prob_func}_"
|
1196
1220
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1197
1221
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1198
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1222
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1223
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1199
1224
|
|
1200
1225
|
@property
|
1201
1226
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|