snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -28,6 +29,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
28
29
|
from snowflake.snowpark import DataFrame, Session
|
29
30
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
30
31
|
from snowflake.snowpark.types import PandasSeries
|
32
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
31
33
|
|
32
34
|
from snowflake.ml.model.model_signature import (
|
33
35
|
DataType,
|
@@ -290,7 +292,6 @@ class IterativeImputer(BaseTransformer):
|
|
290
292
|
sample_weight_col: Optional[str] = None,
|
291
293
|
) -> None:
|
292
294
|
super().__init__()
|
293
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
294
295
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
295
296
|
deps = deps | _gather_dependencies(estimator)
|
296
297
|
self._deps = list(deps)
|
@@ -324,6 +325,15 @@ class IterativeImputer(BaseTransformer):
|
|
324
325
|
self.set_drop_input_cols(drop_input_cols)
|
325
326
|
self.set_sample_weight_col(sample_weight_col)
|
326
327
|
|
328
|
+
def _get_rand_id(self) -> str:
|
329
|
+
"""
|
330
|
+
Generate random id to be used in sproc and stage names.
|
331
|
+
|
332
|
+
Returns:
|
333
|
+
Random id string usable in sproc, table, and stage names.
|
334
|
+
"""
|
335
|
+
return str(uuid4()).replace("-", "_").upper()
|
336
|
+
|
327
337
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
328
338
|
"""
|
329
339
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -402,7 +412,7 @@ class IterativeImputer(BaseTransformer):
|
|
402
412
|
cp.dump(self._sklearn_object, local_transform_file)
|
403
413
|
|
404
414
|
# Create temp stage to run fit.
|
405
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
415
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
406
416
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
407
417
|
SqlResultValidator(
|
408
418
|
session=session,
|
@@ -415,11 +425,12 @@ class IterativeImputer(BaseTransformer):
|
|
415
425
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
416
426
|
).validate()
|
417
427
|
|
418
|
-
|
428
|
+
# Use posixpath to construct stage paths
|
429
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
430
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
419
431
|
local_result_file_name = get_temp_file_path()
|
420
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
421
432
|
|
422
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
433
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
423
434
|
statement_params = telemetry.get_function_usage_statement_params(
|
424
435
|
project=_PROJECT,
|
425
436
|
subproject=_SUBPROJECT,
|
@@ -445,6 +456,7 @@ class IterativeImputer(BaseTransformer):
|
|
445
456
|
replace=True,
|
446
457
|
session=session,
|
447
458
|
statement_params=statement_params,
|
459
|
+
anonymous=True
|
448
460
|
)
|
449
461
|
def fit_wrapper_sproc(
|
450
462
|
session: Session,
|
@@ -453,7 +465,8 @@ class IterativeImputer(BaseTransformer):
|
|
453
465
|
stage_result_file_name: str,
|
454
466
|
input_cols: List[str],
|
455
467
|
label_cols: List[str],
|
456
|
-
sample_weight_col: Optional[str]
|
468
|
+
sample_weight_col: Optional[str],
|
469
|
+
statement_params: Dict[str, str]
|
457
470
|
) -> str:
|
458
471
|
import cloudpickle as cp
|
459
472
|
import numpy as np
|
@@ -520,15 +533,15 @@ class IterativeImputer(BaseTransformer):
|
|
520
533
|
api_calls=[Session.call],
|
521
534
|
custom_tags=dict([("autogen", True)]),
|
522
535
|
)
|
523
|
-
sproc_export_file_name =
|
524
|
-
|
536
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
537
|
+
session,
|
525
538
|
query,
|
526
539
|
stage_transform_file_name,
|
527
540
|
stage_result_file_name,
|
528
541
|
identifier.get_unescaped_names(self.input_cols),
|
529
542
|
identifier.get_unescaped_names(self.label_cols),
|
530
543
|
identifier.get_unescaped_names(self.sample_weight_col),
|
531
|
-
statement_params
|
544
|
+
statement_params,
|
532
545
|
)
|
533
546
|
|
534
547
|
if "|" in sproc_export_file_name:
|
@@ -538,7 +551,7 @@ class IterativeImputer(BaseTransformer):
|
|
538
551
|
print("\n".join(fields[1:]))
|
539
552
|
|
540
553
|
session.file.get(
|
541
|
-
|
554
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
542
555
|
local_result_file_name,
|
543
556
|
statement_params=statement_params
|
544
557
|
)
|
@@ -584,7 +597,7 @@ class IterativeImputer(BaseTransformer):
|
|
584
597
|
|
585
598
|
# Register vectorized UDF for batch inference
|
586
599
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
587
|
-
safe_id=self.
|
600
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
588
601
|
|
589
602
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
590
603
|
# will try to pickle all of self which fails.
|
@@ -676,7 +689,7 @@ class IterativeImputer(BaseTransformer):
|
|
676
689
|
return transformed_pandas_df.to_dict("records")
|
677
690
|
|
678
691
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
679
|
-
safe_id=self.
|
692
|
+
safe_id=self._get_rand_id()
|
680
693
|
)
|
681
694
|
|
682
695
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -841,11 +854,18 @@ class IterativeImputer(BaseTransformer):
|
|
841
854
|
Transformed dataset.
|
842
855
|
"""
|
843
856
|
if isinstance(dataset, DataFrame):
|
857
|
+
expected_type_inferred = ""
|
858
|
+
# when it is classifier, infer the datatype from label columns
|
859
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
860
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
861
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
862
|
+
)
|
863
|
+
|
844
864
|
output_df = self._batch_inference(
|
845
865
|
dataset=dataset,
|
846
866
|
inference_method="predict",
|
847
867
|
expected_output_cols_list=self.output_cols,
|
848
|
-
expected_output_cols_type=
|
868
|
+
expected_output_cols_type=expected_type_inferred,
|
849
869
|
)
|
850
870
|
elif isinstance(dataset, pd.DataFrame):
|
851
871
|
output_df = self._sklearn_inference(
|
@@ -918,10 +938,10 @@ class IterativeImputer(BaseTransformer):
|
|
918
938
|
|
919
939
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
920
940
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
921
|
-
Returns
|
941
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
922
942
|
"""
|
923
943
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
924
|
-
return []
|
944
|
+
return [output_cols_prefix]
|
925
945
|
|
926
946
|
classes = self._sklearn_object.classes_
|
927
947
|
if isinstance(classes, numpy.ndarray):
|
@@ -1146,7 +1166,7 @@ class IterativeImputer(BaseTransformer):
|
|
1146
1166
|
cp.dump(self._sklearn_object, local_score_file)
|
1147
1167
|
|
1148
1168
|
# Create temp stage to run score.
|
1149
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1169
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1150
1170
|
session = dataset._session
|
1151
1171
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1152
1172
|
SqlResultValidator(
|
@@ -1160,8 +1180,9 @@ class IterativeImputer(BaseTransformer):
|
|
1160
1180
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1161
1181
|
).validate()
|
1162
1182
|
|
1163
|
-
|
1164
|
-
|
1183
|
+
# Use posixpath to construct stage paths
|
1184
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1185
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1165
1186
|
statement_params = telemetry.get_function_usage_statement_params(
|
1166
1187
|
project=_PROJECT,
|
1167
1188
|
subproject=_SUBPROJECT,
|
@@ -1187,6 +1208,7 @@ class IterativeImputer(BaseTransformer):
|
|
1187
1208
|
replace=True,
|
1188
1209
|
session=session,
|
1189
1210
|
statement_params=statement_params,
|
1211
|
+
anonymous=True
|
1190
1212
|
)
|
1191
1213
|
def score_wrapper_sproc(
|
1192
1214
|
session: Session,
|
@@ -1194,7 +1216,8 @@ class IterativeImputer(BaseTransformer):
|
|
1194
1216
|
stage_score_file_name: str,
|
1195
1217
|
input_cols: List[str],
|
1196
1218
|
label_cols: List[str],
|
1197
|
-
sample_weight_col: Optional[str]
|
1219
|
+
sample_weight_col: Optional[str],
|
1220
|
+
statement_params: Dict[str, str]
|
1198
1221
|
) -> float:
|
1199
1222
|
import cloudpickle as cp
|
1200
1223
|
import numpy as np
|
@@ -1244,14 +1267,14 @@ class IterativeImputer(BaseTransformer):
|
|
1244
1267
|
api_calls=[Session.call],
|
1245
1268
|
custom_tags=dict([("autogen", True)]),
|
1246
1269
|
)
|
1247
|
-
score =
|
1248
|
-
|
1270
|
+
score = score_wrapper_sproc(
|
1271
|
+
session,
|
1249
1272
|
query,
|
1250
1273
|
stage_score_file_name,
|
1251
1274
|
identifier.get_unescaped_names(self.input_cols),
|
1252
1275
|
identifier.get_unescaped_names(self.label_cols),
|
1253
1276
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1254
|
-
statement_params
|
1277
|
+
statement_params,
|
1255
1278
|
)
|
1256
1279
|
|
1257
1280
|
cleanup_temp_files([local_score_file_name])
|
@@ -1269,18 +1292,20 @@ class IterativeImputer(BaseTransformer):
|
|
1269
1292
|
if self._sklearn_object._estimator_type == 'classifier':
|
1270
1293
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1271
1294
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1272
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1295
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1296
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1273
1297
|
# For regressor, the type of predict is float64
|
1274
1298
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1275
1299
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1276
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1277
|
-
|
1300
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1301
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1278
1302
|
for prob_func in PROB_FUNCTIONS:
|
1279
1303
|
if hasattr(self, prob_func):
|
1280
1304
|
output_cols_prefix: str = f"{prob_func}_"
|
1281
1305
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1282
1306
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1283
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1307
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1308
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1284
1309
|
|
1285
1310
|
@property
|
1286
1311
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -233,7 +235,6 @@ class KNNImputer(BaseTransformer):
|
|
233
235
|
sample_weight_col: Optional[str] = None,
|
234
236
|
) -> None:
|
235
237
|
super().__init__()
|
236
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
237
238
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
238
239
|
|
239
240
|
self._deps = list(deps)
|
@@ -259,6 +260,15 @@ class KNNImputer(BaseTransformer):
|
|
259
260
|
self.set_drop_input_cols(drop_input_cols)
|
260
261
|
self.set_sample_weight_col(sample_weight_col)
|
261
262
|
|
263
|
+
def _get_rand_id(self) -> str:
|
264
|
+
"""
|
265
|
+
Generate random id to be used in sproc and stage names.
|
266
|
+
|
267
|
+
Returns:
|
268
|
+
Random id string usable in sproc, table, and stage names.
|
269
|
+
"""
|
270
|
+
return str(uuid4()).replace("-", "_").upper()
|
271
|
+
|
262
272
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
263
273
|
"""
|
264
274
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -337,7 +347,7 @@ class KNNImputer(BaseTransformer):
|
|
337
347
|
cp.dump(self._sklearn_object, local_transform_file)
|
338
348
|
|
339
349
|
# Create temp stage to run fit.
|
340
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
350
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
341
351
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
342
352
|
SqlResultValidator(
|
343
353
|
session=session,
|
@@ -350,11 +360,12 @@ class KNNImputer(BaseTransformer):
|
|
350
360
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
351
361
|
).validate()
|
352
362
|
|
353
|
-
|
363
|
+
# Use posixpath to construct stage paths
|
364
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
365
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
354
366
|
local_result_file_name = get_temp_file_path()
|
355
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
356
367
|
|
357
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
368
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
358
369
|
statement_params = telemetry.get_function_usage_statement_params(
|
359
370
|
project=_PROJECT,
|
360
371
|
subproject=_SUBPROJECT,
|
@@ -380,6 +391,7 @@ class KNNImputer(BaseTransformer):
|
|
380
391
|
replace=True,
|
381
392
|
session=session,
|
382
393
|
statement_params=statement_params,
|
394
|
+
anonymous=True
|
383
395
|
)
|
384
396
|
def fit_wrapper_sproc(
|
385
397
|
session: Session,
|
@@ -388,7 +400,8 @@ class KNNImputer(BaseTransformer):
|
|
388
400
|
stage_result_file_name: str,
|
389
401
|
input_cols: List[str],
|
390
402
|
label_cols: List[str],
|
391
|
-
sample_weight_col: Optional[str]
|
403
|
+
sample_weight_col: Optional[str],
|
404
|
+
statement_params: Dict[str, str]
|
392
405
|
) -> str:
|
393
406
|
import cloudpickle as cp
|
394
407
|
import numpy as np
|
@@ -455,15 +468,15 @@ class KNNImputer(BaseTransformer):
|
|
455
468
|
api_calls=[Session.call],
|
456
469
|
custom_tags=dict([("autogen", True)]),
|
457
470
|
)
|
458
|
-
sproc_export_file_name =
|
459
|
-
|
471
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
472
|
+
session,
|
460
473
|
query,
|
461
474
|
stage_transform_file_name,
|
462
475
|
stage_result_file_name,
|
463
476
|
identifier.get_unescaped_names(self.input_cols),
|
464
477
|
identifier.get_unescaped_names(self.label_cols),
|
465
478
|
identifier.get_unescaped_names(self.sample_weight_col),
|
466
|
-
statement_params
|
479
|
+
statement_params,
|
467
480
|
)
|
468
481
|
|
469
482
|
if "|" in sproc_export_file_name:
|
@@ -473,7 +486,7 @@ class KNNImputer(BaseTransformer):
|
|
473
486
|
print("\n".join(fields[1:]))
|
474
487
|
|
475
488
|
session.file.get(
|
476
|
-
|
489
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
477
490
|
local_result_file_name,
|
478
491
|
statement_params=statement_params
|
479
492
|
)
|
@@ -519,7 +532,7 @@ class KNNImputer(BaseTransformer):
|
|
519
532
|
|
520
533
|
# Register vectorized UDF for batch inference
|
521
534
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
522
|
-
safe_id=self.
|
535
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
523
536
|
|
524
537
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
525
538
|
# will try to pickle all of self which fails.
|
@@ -611,7 +624,7 @@ class KNNImputer(BaseTransformer):
|
|
611
624
|
return transformed_pandas_df.to_dict("records")
|
612
625
|
|
613
626
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
614
|
-
safe_id=self.
|
627
|
+
safe_id=self._get_rand_id()
|
615
628
|
)
|
616
629
|
|
617
630
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -776,11 +789,18 @@ class KNNImputer(BaseTransformer):
|
|
776
789
|
Transformed dataset.
|
777
790
|
"""
|
778
791
|
if isinstance(dataset, DataFrame):
|
792
|
+
expected_type_inferred = ""
|
793
|
+
# when it is classifier, infer the datatype from label columns
|
794
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
795
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
796
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
797
|
+
)
|
798
|
+
|
779
799
|
output_df = self._batch_inference(
|
780
800
|
dataset=dataset,
|
781
801
|
inference_method="predict",
|
782
802
|
expected_output_cols_list=self.output_cols,
|
783
|
-
expected_output_cols_type=
|
803
|
+
expected_output_cols_type=expected_type_inferred,
|
784
804
|
)
|
785
805
|
elif isinstance(dataset, pd.DataFrame):
|
786
806
|
output_df = self._sklearn_inference(
|
@@ -853,10 +873,10 @@ class KNNImputer(BaseTransformer):
|
|
853
873
|
|
854
874
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
855
875
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
856
|
-
Returns
|
876
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
857
877
|
"""
|
858
878
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
859
|
-
return []
|
879
|
+
return [output_cols_prefix]
|
860
880
|
|
861
881
|
classes = self._sklearn_object.classes_
|
862
882
|
if isinstance(classes, numpy.ndarray):
|
@@ -1081,7 +1101,7 @@ class KNNImputer(BaseTransformer):
|
|
1081
1101
|
cp.dump(self._sklearn_object, local_score_file)
|
1082
1102
|
|
1083
1103
|
# Create temp stage to run score.
|
1084
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1104
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1085
1105
|
session = dataset._session
|
1086
1106
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1087
1107
|
SqlResultValidator(
|
@@ -1095,8 +1115,9 @@ class KNNImputer(BaseTransformer):
|
|
1095
1115
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1096
1116
|
).validate()
|
1097
1117
|
|
1098
|
-
|
1099
|
-
|
1118
|
+
# Use posixpath to construct stage paths
|
1119
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1120
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1100
1121
|
statement_params = telemetry.get_function_usage_statement_params(
|
1101
1122
|
project=_PROJECT,
|
1102
1123
|
subproject=_SUBPROJECT,
|
@@ -1122,6 +1143,7 @@ class KNNImputer(BaseTransformer):
|
|
1122
1143
|
replace=True,
|
1123
1144
|
session=session,
|
1124
1145
|
statement_params=statement_params,
|
1146
|
+
anonymous=True
|
1125
1147
|
)
|
1126
1148
|
def score_wrapper_sproc(
|
1127
1149
|
session: Session,
|
@@ -1129,7 +1151,8 @@ class KNNImputer(BaseTransformer):
|
|
1129
1151
|
stage_score_file_name: str,
|
1130
1152
|
input_cols: List[str],
|
1131
1153
|
label_cols: List[str],
|
1132
|
-
sample_weight_col: Optional[str]
|
1154
|
+
sample_weight_col: Optional[str],
|
1155
|
+
statement_params: Dict[str, str]
|
1133
1156
|
) -> float:
|
1134
1157
|
import cloudpickle as cp
|
1135
1158
|
import numpy as np
|
@@ -1179,14 +1202,14 @@ class KNNImputer(BaseTransformer):
|
|
1179
1202
|
api_calls=[Session.call],
|
1180
1203
|
custom_tags=dict([("autogen", True)]),
|
1181
1204
|
)
|
1182
|
-
score =
|
1183
|
-
|
1205
|
+
score = score_wrapper_sproc(
|
1206
|
+
session,
|
1184
1207
|
query,
|
1185
1208
|
stage_score_file_name,
|
1186
1209
|
identifier.get_unescaped_names(self.input_cols),
|
1187
1210
|
identifier.get_unescaped_names(self.label_cols),
|
1188
1211
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1189
|
-
statement_params
|
1212
|
+
statement_params,
|
1190
1213
|
)
|
1191
1214
|
|
1192
1215
|
cleanup_temp_files([local_score_file_name])
|
@@ -1204,18 +1227,20 @@ class KNNImputer(BaseTransformer):
|
|
1204
1227
|
if self._sklearn_object._estimator_type == 'classifier':
|
1205
1228
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1206
1229
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1207
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1230
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1231
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1208
1232
|
# For regressor, the type of predict is float64
|
1209
1233
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1210
1234
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1211
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1212
|
-
|
1235
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1236
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1213
1237
|
for prob_func in PROB_FUNCTIONS:
|
1214
1238
|
if hasattr(self, prob_func):
|
1215
1239
|
output_cols_prefix: str = f"{prob_func}_"
|
1216
1240
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1217
1241
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1218
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1242
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1243
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1219
1244
|
|
1220
1245
|
@property
|
1221
1246
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|