snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -273,7 +275,6 @@ class Perceptron(BaseTransformer):
|
|
273
275
|
sample_weight_col: Optional[str] = None,
|
274
276
|
) -> None:
|
275
277
|
super().__init__()
|
276
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
277
278
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
278
279
|
|
279
280
|
self._deps = list(deps)
|
@@ -308,6 +309,15 @@ class Perceptron(BaseTransformer):
|
|
308
309
|
self.set_drop_input_cols(drop_input_cols)
|
309
310
|
self.set_sample_weight_col(sample_weight_col)
|
310
311
|
|
312
|
+
def _get_rand_id(self) -> str:
|
313
|
+
"""
|
314
|
+
Generate random id to be used in sproc and stage names.
|
315
|
+
|
316
|
+
Returns:
|
317
|
+
Random id string usable in sproc, table, and stage names.
|
318
|
+
"""
|
319
|
+
return str(uuid4()).replace("-", "_").upper()
|
320
|
+
|
311
321
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
312
322
|
"""
|
313
323
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -386,7 +396,7 @@ class Perceptron(BaseTransformer):
|
|
386
396
|
cp.dump(self._sklearn_object, local_transform_file)
|
387
397
|
|
388
398
|
# Create temp stage to run fit.
|
389
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
399
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
390
400
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
391
401
|
SqlResultValidator(
|
392
402
|
session=session,
|
@@ -399,11 +409,12 @@ class Perceptron(BaseTransformer):
|
|
399
409
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
400
410
|
).validate()
|
401
411
|
|
402
|
-
|
412
|
+
# Use posixpath to construct stage paths
|
413
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
414
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
403
415
|
local_result_file_name = get_temp_file_path()
|
404
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
405
416
|
|
406
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
417
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
407
418
|
statement_params = telemetry.get_function_usage_statement_params(
|
408
419
|
project=_PROJECT,
|
409
420
|
subproject=_SUBPROJECT,
|
@@ -429,6 +440,7 @@ class Perceptron(BaseTransformer):
|
|
429
440
|
replace=True,
|
430
441
|
session=session,
|
431
442
|
statement_params=statement_params,
|
443
|
+
anonymous=True
|
432
444
|
)
|
433
445
|
def fit_wrapper_sproc(
|
434
446
|
session: Session,
|
@@ -437,7 +449,8 @@ class Perceptron(BaseTransformer):
|
|
437
449
|
stage_result_file_name: str,
|
438
450
|
input_cols: List[str],
|
439
451
|
label_cols: List[str],
|
440
|
-
sample_weight_col: Optional[str]
|
452
|
+
sample_weight_col: Optional[str],
|
453
|
+
statement_params: Dict[str, str]
|
441
454
|
) -> str:
|
442
455
|
import cloudpickle as cp
|
443
456
|
import numpy as np
|
@@ -504,15 +517,15 @@ class Perceptron(BaseTransformer):
|
|
504
517
|
api_calls=[Session.call],
|
505
518
|
custom_tags=dict([("autogen", True)]),
|
506
519
|
)
|
507
|
-
sproc_export_file_name =
|
508
|
-
|
520
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
521
|
+
session,
|
509
522
|
query,
|
510
523
|
stage_transform_file_name,
|
511
524
|
stage_result_file_name,
|
512
525
|
identifier.get_unescaped_names(self.input_cols),
|
513
526
|
identifier.get_unescaped_names(self.label_cols),
|
514
527
|
identifier.get_unescaped_names(self.sample_weight_col),
|
515
|
-
statement_params
|
528
|
+
statement_params,
|
516
529
|
)
|
517
530
|
|
518
531
|
if "|" in sproc_export_file_name:
|
@@ -522,7 +535,7 @@ class Perceptron(BaseTransformer):
|
|
522
535
|
print("\n".join(fields[1:]))
|
523
536
|
|
524
537
|
session.file.get(
|
525
|
-
|
538
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
526
539
|
local_result_file_name,
|
527
540
|
statement_params=statement_params
|
528
541
|
)
|
@@ -568,7 +581,7 @@ class Perceptron(BaseTransformer):
|
|
568
581
|
|
569
582
|
# Register vectorized UDF for batch inference
|
570
583
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
571
|
-
safe_id=self.
|
584
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
572
585
|
|
573
586
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
574
587
|
# will try to pickle all of self which fails.
|
@@ -660,7 +673,7 @@ class Perceptron(BaseTransformer):
|
|
660
673
|
return transformed_pandas_df.to_dict("records")
|
661
674
|
|
662
675
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
663
|
-
safe_id=self.
|
676
|
+
safe_id=self._get_rand_id()
|
664
677
|
)
|
665
678
|
|
666
679
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -827,11 +840,18 @@ class Perceptron(BaseTransformer):
|
|
827
840
|
Transformed dataset.
|
828
841
|
"""
|
829
842
|
if isinstance(dataset, DataFrame):
|
843
|
+
expected_type_inferred = ""
|
844
|
+
# when it is classifier, infer the datatype from label columns
|
845
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
846
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
847
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
848
|
+
)
|
849
|
+
|
830
850
|
output_df = self._batch_inference(
|
831
851
|
dataset=dataset,
|
832
852
|
inference_method="predict",
|
833
853
|
expected_output_cols_list=self.output_cols,
|
834
|
-
expected_output_cols_type=
|
854
|
+
expected_output_cols_type=expected_type_inferred,
|
835
855
|
)
|
836
856
|
elif isinstance(dataset, pd.DataFrame):
|
837
857
|
output_df = self._sklearn_inference(
|
@@ -902,10 +922,10 @@ class Perceptron(BaseTransformer):
|
|
902
922
|
|
903
923
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
904
924
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
905
|
-
Returns
|
925
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
906
926
|
"""
|
907
927
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
908
|
-
return []
|
928
|
+
return [output_cols_prefix]
|
909
929
|
|
910
930
|
classes = self._sklearn_object.classes_
|
911
931
|
if isinstance(classes, numpy.ndarray):
|
@@ -1132,7 +1152,7 @@ class Perceptron(BaseTransformer):
|
|
1132
1152
|
cp.dump(self._sklearn_object, local_score_file)
|
1133
1153
|
|
1134
1154
|
# Create temp stage to run score.
|
1135
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1155
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1136
1156
|
session = dataset._session
|
1137
1157
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1138
1158
|
SqlResultValidator(
|
@@ -1146,8 +1166,9 @@ class Perceptron(BaseTransformer):
|
|
1146
1166
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1147
1167
|
).validate()
|
1148
1168
|
|
1149
|
-
|
1150
|
-
|
1169
|
+
# Use posixpath to construct stage paths
|
1170
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1171
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1151
1172
|
statement_params = telemetry.get_function_usage_statement_params(
|
1152
1173
|
project=_PROJECT,
|
1153
1174
|
subproject=_SUBPROJECT,
|
@@ -1173,6 +1194,7 @@ class Perceptron(BaseTransformer):
|
|
1173
1194
|
replace=True,
|
1174
1195
|
session=session,
|
1175
1196
|
statement_params=statement_params,
|
1197
|
+
anonymous=True
|
1176
1198
|
)
|
1177
1199
|
def score_wrapper_sproc(
|
1178
1200
|
session: Session,
|
@@ -1180,7 +1202,8 @@ class Perceptron(BaseTransformer):
|
|
1180
1202
|
stage_score_file_name: str,
|
1181
1203
|
input_cols: List[str],
|
1182
1204
|
label_cols: List[str],
|
1183
|
-
sample_weight_col: Optional[str]
|
1205
|
+
sample_weight_col: Optional[str],
|
1206
|
+
statement_params: Dict[str, str]
|
1184
1207
|
) -> float:
|
1185
1208
|
import cloudpickle as cp
|
1186
1209
|
import numpy as np
|
@@ -1230,14 +1253,14 @@ class Perceptron(BaseTransformer):
|
|
1230
1253
|
api_calls=[Session.call],
|
1231
1254
|
custom_tags=dict([("autogen", True)]),
|
1232
1255
|
)
|
1233
|
-
score =
|
1234
|
-
|
1256
|
+
score = score_wrapper_sproc(
|
1257
|
+
session,
|
1235
1258
|
query,
|
1236
1259
|
stage_score_file_name,
|
1237
1260
|
identifier.get_unescaped_names(self.input_cols),
|
1238
1261
|
identifier.get_unescaped_names(self.label_cols),
|
1239
1262
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1240
|
-
statement_params
|
1263
|
+
statement_params,
|
1241
1264
|
)
|
1242
1265
|
|
1243
1266
|
cleanup_temp_files([local_score_file_name])
|
@@ -1255,18 +1278,20 @@ class Perceptron(BaseTransformer):
|
|
1255
1278
|
if self._sklearn_object._estimator_type == 'classifier':
|
1256
1279
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1257
1280
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1258
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1281
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1282
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1259
1283
|
# For regressor, the type of predict is float64
|
1260
1284
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1261
1285
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1262
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1263
|
-
|
1286
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1287
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1264
1288
|
for prob_func in PROB_FUNCTIONS:
|
1265
1289
|
if hasattr(self, prob_func):
|
1266
1290
|
output_cols_prefix: str = f"{prob_func}_"
|
1267
1291
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1268
1292
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1269
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1293
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1294
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1270
1295
|
|
1271
1296
|
@property
|
1272
1297
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -230,7 +232,6 @@ class PoissonRegressor(BaseTransformer):
|
|
230
232
|
sample_weight_col: Optional[str] = None,
|
231
233
|
) -> None:
|
232
234
|
super().__init__()
|
233
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
234
235
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
235
236
|
|
236
237
|
self._deps = list(deps)
|
@@ -256,6 +257,15 @@ class PoissonRegressor(BaseTransformer):
|
|
256
257
|
self.set_drop_input_cols(drop_input_cols)
|
257
258
|
self.set_sample_weight_col(sample_weight_col)
|
258
259
|
|
260
|
+
def _get_rand_id(self) -> str:
|
261
|
+
"""
|
262
|
+
Generate random id to be used in sproc and stage names.
|
263
|
+
|
264
|
+
Returns:
|
265
|
+
Random id string usable in sproc, table, and stage names.
|
266
|
+
"""
|
267
|
+
return str(uuid4()).replace("-", "_").upper()
|
268
|
+
|
259
269
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
260
270
|
"""
|
261
271
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -334,7 +344,7 @@ class PoissonRegressor(BaseTransformer):
|
|
334
344
|
cp.dump(self._sklearn_object, local_transform_file)
|
335
345
|
|
336
346
|
# Create temp stage to run fit.
|
337
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
347
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
338
348
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
339
349
|
SqlResultValidator(
|
340
350
|
session=session,
|
@@ -347,11 +357,12 @@ class PoissonRegressor(BaseTransformer):
|
|
347
357
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
348
358
|
).validate()
|
349
359
|
|
350
|
-
|
360
|
+
# Use posixpath to construct stage paths
|
361
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
362
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
351
363
|
local_result_file_name = get_temp_file_path()
|
352
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
353
364
|
|
354
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
365
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
355
366
|
statement_params = telemetry.get_function_usage_statement_params(
|
356
367
|
project=_PROJECT,
|
357
368
|
subproject=_SUBPROJECT,
|
@@ -377,6 +388,7 @@ class PoissonRegressor(BaseTransformer):
|
|
377
388
|
replace=True,
|
378
389
|
session=session,
|
379
390
|
statement_params=statement_params,
|
391
|
+
anonymous=True
|
380
392
|
)
|
381
393
|
def fit_wrapper_sproc(
|
382
394
|
session: Session,
|
@@ -385,7 +397,8 @@ class PoissonRegressor(BaseTransformer):
|
|
385
397
|
stage_result_file_name: str,
|
386
398
|
input_cols: List[str],
|
387
399
|
label_cols: List[str],
|
388
|
-
sample_weight_col: Optional[str]
|
400
|
+
sample_weight_col: Optional[str],
|
401
|
+
statement_params: Dict[str, str]
|
389
402
|
) -> str:
|
390
403
|
import cloudpickle as cp
|
391
404
|
import numpy as np
|
@@ -452,15 +465,15 @@ class PoissonRegressor(BaseTransformer):
|
|
452
465
|
api_calls=[Session.call],
|
453
466
|
custom_tags=dict([("autogen", True)]),
|
454
467
|
)
|
455
|
-
sproc_export_file_name =
|
456
|
-
|
468
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
469
|
+
session,
|
457
470
|
query,
|
458
471
|
stage_transform_file_name,
|
459
472
|
stage_result_file_name,
|
460
473
|
identifier.get_unescaped_names(self.input_cols),
|
461
474
|
identifier.get_unescaped_names(self.label_cols),
|
462
475
|
identifier.get_unescaped_names(self.sample_weight_col),
|
463
|
-
statement_params
|
476
|
+
statement_params,
|
464
477
|
)
|
465
478
|
|
466
479
|
if "|" in sproc_export_file_name:
|
@@ -470,7 +483,7 @@ class PoissonRegressor(BaseTransformer):
|
|
470
483
|
print("\n".join(fields[1:]))
|
471
484
|
|
472
485
|
session.file.get(
|
473
|
-
|
486
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
474
487
|
local_result_file_name,
|
475
488
|
statement_params=statement_params
|
476
489
|
)
|
@@ -516,7 +529,7 @@ class PoissonRegressor(BaseTransformer):
|
|
516
529
|
|
517
530
|
# Register vectorized UDF for batch inference
|
518
531
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
519
|
-
safe_id=self.
|
532
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
520
533
|
|
521
534
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
522
535
|
# will try to pickle all of self which fails.
|
@@ -608,7 +621,7 @@ class PoissonRegressor(BaseTransformer):
|
|
608
621
|
return transformed_pandas_df.to_dict("records")
|
609
622
|
|
610
623
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
611
|
-
safe_id=self.
|
624
|
+
safe_id=self._get_rand_id()
|
612
625
|
)
|
613
626
|
|
614
627
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -775,11 +788,18 @@ class PoissonRegressor(BaseTransformer):
|
|
775
788
|
Transformed dataset.
|
776
789
|
"""
|
777
790
|
if isinstance(dataset, DataFrame):
|
791
|
+
expected_type_inferred = "float"
|
792
|
+
# when it is classifier, infer the datatype from label columns
|
793
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
794
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
795
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
796
|
+
)
|
797
|
+
|
778
798
|
output_df = self._batch_inference(
|
779
799
|
dataset=dataset,
|
780
800
|
inference_method="predict",
|
781
801
|
expected_output_cols_list=self.output_cols,
|
782
|
-
expected_output_cols_type=
|
802
|
+
expected_output_cols_type=expected_type_inferred,
|
783
803
|
)
|
784
804
|
elif isinstance(dataset, pd.DataFrame):
|
785
805
|
output_df = self._sklearn_inference(
|
@@ -850,10 +870,10 @@ class PoissonRegressor(BaseTransformer):
|
|
850
870
|
|
851
871
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
852
872
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
853
|
-
Returns
|
873
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
854
874
|
"""
|
855
875
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
856
|
-
return []
|
876
|
+
return [output_cols_prefix]
|
857
877
|
|
858
878
|
classes = self._sklearn_object.classes_
|
859
879
|
if isinstance(classes, numpy.ndarray):
|
@@ -1078,7 +1098,7 @@ class PoissonRegressor(BaseTransformer):
|
|
1078
1098
|
cp.dump(self._sklearn_object, local_score_file)
|
1079
1099
|
|
1080
1100
|
# Create temp stage to run score.
|
1081
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1101
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1082
1102
|
session = dataset._session
|
1083
1103
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1084
1104
|
SqlResultValidator(
|
@@ -1092,8 +1112,9 @@ class PoissonRegressor(BaseTransformer):
|
|
1092
1112
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1093
1113
|
).validate()
|
1094
1114
|
|
1095
|
-
|
1096
|
-
|
1115
|
+
# Use posixpath to construct stage paths
|
1116
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1117
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1097
1118
|
statement_params = telemetry.get_function_usage_statement_params(
|
1098
1119
|
project=_PROJECT,
|
1099
1120
|
subproject=_SUBPROJECT,
|
@@ -1119,6 +1140,7 @@ class PoissonRegressor(BaseTransformer):
|
|
1119
1140
|
replace=True,
|
1120
1141
|
session=session,
|
1121
1142
|
statement_params=statement_params,
|
1143
|
+
anonymous=True
|
1122
1144
|
)
|
1123
1145
|
def score_wrapper_sproc(
|
1124
1146
|
session: Session,
|
@@ -1126,7 +1148,8 @@ class PoissonRegressor(BaseTransformer):
|
|
1126
1148
|
stage_score_file_name: str,
|
1127
1149
|
input_cols: List[str],
|
1128
1150
|
label_cols: List[str],
|
1129
|
-
sample_weight_col: Optional[str]
|
1151
|
+
sample_weight_col: Optional[str],
|
1152
|
+
statement_params: Dict[str, str]
|
1130
1153
|
) -> float:
|
1131
1154
|
import cloudpickle as cp
|
1132
1155
|
import numpy as np
|
@@ -1176,14 +1199,14 @@ class PoissonRegressor(BaseTransformer):
|
|
1176
1199
|
api_calls=[Session.call],
|
1177
1200
|
custom_tags=dict([("autogen", True)]),
|
1178
1201
|
)
|
1179
|
-
score =
|
1180
|
-
|
1202
|
+
score = score_wrapper_sproc(
|
1203
|
+
session,
|
1181
1204
|
query,
|
1182
1205
|
stage_score_file_name,
|
1183
1206
|
identifier.get_unescaped_names(self.input_cols),
|
1184
1207
|
identifier.get_unescaped_names(self.label_cols),
|
1185
1208
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1186
|
-
statement_params
|
1209
|
+
statement_params,
|
1187
1210
|
)
|
1188
1211
|
|
1189
1212
|
cleanup_temp_files([local_score_file_name])
|
@@ -1201,18 +1224,20 @@ class PoissonRegressor(BaseTransformer):
|
|
1201
1224
|
if self._sklearn_object._estimator_type == 'classifier':
|
1202
1225
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1203
1226
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1204
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1227
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1228
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1205
1229
|
# For regressor, the type of predict is float64
|
1206
1230
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1207
1231
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1208
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1209
|
-
|
1232
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1233
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1210
1234
|
for prob_func in PROB_FUNCTIONS:
|
1211
1235
|
if hasattr(self, prob_func):
|
1212
1236
|
output_cols_prefix: str = f"{prob_func}_"
|
1213
1237
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1214
1238
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1215
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1239
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1240
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1216
1241
|
|
1217
1242
|
@property
|
1218
1243
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|