snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -235,7 +237,6 @@ class TheilSenRegressor(BaseTransformer):
|
|
235
237
|
sample_weight_col: Optional[str] = None,
|
236
238
|
) -> None:
|
237
239
|
super().__init__()
|
238
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
239
240
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
240
241
|
|
241
242
|
self._deps = list(deps)
|
@@ -263,6 +264,15 @@ class TheilSenRegressor(BaseTransformer):
|
|
263
264
|
self.set_drop_input_cols(drop_input_cols)
|
264
265
|
self.set_sample_weight_col(sample_weight_col)
|
265
266
|
|
267
|
+
def _get_rand_id(self) -> str:
|
268
|
+
"""
|
269
|
+
Generate random id to be used in sproc and stage names.
|
270
|
+
|
271
|
+
Returns:
|
272
|
+
Random id string usable in sproc, table, and stage names.
|
273
|
+
"""
|
274
|
+
return str(uuid4()).replace("-", "_").upper()
|
275
|
+
|
266
276
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
267
277
|
"""
|
268
278
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -341,7 +351,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
341
351
|
cp.dump(self._sklearn_object, local_transform_file)
|
342
352
|
|
343
353
|
# Create temp stage to run fit.
|
344
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
354
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
345
355
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
346
356
|
SqlResultValidator(
|
347
357
|
session=session,
|
@@ -354,11 +364,12 @@ class TheilSenRegressor(BaseTransformer):
|
|
354
364
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
355
365
|
).validate()
|
356
366
|
|
357
|
-
|
367
|
+
# Use posixpath to construct stage paths
|
368
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
369
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
358
370
|
local_result_file_name = get_temp_file_path()
|
359
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
360
371
|
|
361
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
372
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
362
373
|
statement_params = telemetry.get_function_usage_statement_params(
|
363
374
|
project=_PROJECT,
|
364
375
|
subproject=_SUBPROJECT,
|
@@ -384,6 +395,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
384
395
|
replace=True,
|
385
396
|
session=session,
|
386
397
|
statement_params=statement_params,
|
398
|
+
anonymous=True
|
387
399
|
)
|
388
400
|
def fit_wrapper_sproc(
|
389
401
|
session: Session,
|
@@ -392,7 +404,8 @@ class TheilSenRegressor(BaseTransformer):
|
|
392
404
|
stage_result_file_name: str,
|
393
405
|
input_cols: List[str],
|
394
406
|
label_cols: List[str],
|
395
|
-
sample_weight_col: Optional[str]
|
407
|
+
sample_weight_col: Optional[str],
|
408
|
+
statement_params: Dict[str, str]
|
396
409
|
) -> str:
|
397
410
|
import cloudpickle as cp
|
398
411
|
import numpy as np
|
@@ -459,15 +472,15 @@ class TheilSenRegressor(BaseTransformer):
|
|
459
472
|
api_calls=[Session.call],
|
460
473
|
custom_tags=dict([("autogen", True)]),
|
461
474
|
)
|
462
|
-
sproc_export_file_name =
|
463
|
-
|
475
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
476
|
+
session,
|
464
477
|
query,
|
465
478
|
stage_transform_file_name,
|
466
479
|
stage_result_file_name,
|
467
480
|
identifier.get_unescaped_names(self.input_cols),
|
468
481
|
identifier.get_unescaped_names(self.label_cols),
|
469
482
|
identifier.get_unescaped_names(self.sample_weight_col),
|
470
|
-
statement_params
|
483
|
+
statement_params,
|
471
484
|
)
|
472
485
|
|
473
486
|
if "|" in sproc_export_file_name:
|
@@ -477,7 +490,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
477
490
|
print("\n".join(fields[1:]))
|
478
491
|
|
479
492
|
session.file.get(
|
480
|
-
|
493
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
481
494
|
local_result_file_name,
|
482
495
|
statement_params=statement_params
|
483
496
|
)
|
@@ -523,7 +536,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
523
536
|
|
524
537
|
# Register vectorized UDF for batch inference
|
525
538
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
526
|
-
safe_id=self.
|
539
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
527
540
|
|
528
541
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
529
542
|
# will try to pickle all of self which fails.
|
@@ -615,7 +628,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
615
628
|
return transformed_pandas_df.to_dict("records")
|
616
629
|
|
617
630
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
618
|
-
safe_id=self.
|
631
|
+
safe_id=self._get_rand_id()
|
619
632
|
)
|
620
633
|
|
621
634
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -782,11 +795,18 @@ class TheilSenRegressor(BaseTransformer):
|
|
782
795
|
Transformed dataset.
|
783
796
|
"""
|
784
797
|
if isinstance(dataset, DataFrame):
|
798
|
+
expected_type_inferred = "float"
|
799
|
+
# when it is classifier, infer the datatype from label columns
|
800
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
801
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
802
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
803
|
+
)
|
804
|
+
|
785
805
|
output_df = self._batch_inference(
|
786
806
|
dataset=dataset,
|
787
807
|
inference_method="predict",
|
788
808
|
expected_output_cols_list=self.output_cols,
|
789
|
-
expected_output_cols_type=
|
809
|
+
expected_output_cols_type=expected_type_inferred,
|
790
810
|
)
|
791
811
|
elif isinstance(dataset, pd.DataFrame):
|
792
812
|
output_df = self._sklearn_inference(
|
@@ -857,10 +877,10 @@ class TheilSenRegressor(BaseTransformer):
|
|
857
877
|
|
858
878
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
859
879
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
860
|
-
Returns
|
880
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
861
881
|
"""
|
862
882
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
863
|
-
return []
|
883
|
+
return [output_cols_prefix]
|
864
884
|
|
865
885
|
classes = self._sklearn_object.classes_
|
866
886
|
if isinstance(classes, numpy.ndarray):
|
@@ -1085,7 +1105,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
1085
1105
|
cp.dump(self._sklearn_object, local_score_file)
|
1086
1106
|
|
1087
1107
|
# Create temp stage to run score.
|
1088
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1108
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1089
1109
|
session = dataset._session
|
1090
1110
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1091
1111
|
SqlResultValidator(
|
@@ -1099,8 +1119,9 @@ class TheilSenRegressor(BaseTransformer):
|
|
1099
1119
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1100
1120
|
).validate()
|
1101
1121
|
|
1102
|
-
|
1103
|
-
|
1122
|
+
# Use posixpath to construct stage paths
|
1123
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1124
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1104
1125
|
statement_params = telemetry.get_function_usage_statement_params(
|
1105
1126
|
project=_PROJECT,
|
1106
1127
|
subproject=_SUBPROJECT,
|
@@ -1126,6 +1147,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
1126
1147
|
replace=True,
|
1127
1148
|
session=session,
|
1128
1149
|
statement_params=statement_params,
|
1150
|
+
anonymous=True
|
1129
1151
|
)
|
1130
1152
|
def score_wrapper_sproc(
|
1131
1153
|
session: Session,
|
@@ -1133,7 +1155,8 @@ class TheilSenRegressor(BaseTransformer):
|
|
1133
1155
|
stage_score_file_name: str,
|
1134
1156
|
input_cols: List[str],
|
1135
1157
|
label_cols: List[str],
|
1136
|
-
sample_weight_col: Optional[str]
|
1158
|
+
sample_weight_col: Optional[str],
|
1159
|
+
statement_params: Dict[str, str]
|
1137
1160
|
) -> float:
|
1138
1161
|
import cloudpickle as cp
|
1139
1162
|
import numpy as np
|
@@ -1183,14 +1206,14 @@ class TheilSenRegressor(BaseTransformer):
|
|
1183
1206
|
api_calls=[Session.call],
|
1184
1207
|
custom_tags=dict([("autogen", True)]),
|
1185
1208
|
)
|
1186
|
-
score =
|
1187
|
-
|
1209
|
+
score = score_wrapper_sproc(
|
1210
|
+
session,
|
1188
1211
|
query,
|
1189
1212
|
stage_score_file_name,
|
1190
1213
|
identifier.get_unescaped_names(self.input_cols),
|
1191
1214
|
identifier.get_unescaped_names(self.label_cols),
|
1192
1215
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1193
|
-
statement_params
|
1216
|
+
statement_params,
|
1194
1217
|
)
|
1195
1218
|
|
1196
1219
|
cleanup_temp_files([local_score_file_name])
|
@@ -1208,18 +1231,20 @@ class TheilSenRegressor(BaseTransformer):
|
|
1208
1231
|
if self._sklearn_object._estimator_type == 'classifier':
|
1209
1232
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1210
1233
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1211
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1234
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1235
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1212
1236
|
# For regressor, the type of predict is float64
|
1213
1237
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1214
1238
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1215
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1216
|
-
|
1239
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1240
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1217
1241
|
for prob_func in PROB_FUNCTIONS:
|
1218
1242
|
if hasattr(self, prob_func):
|
1219
1243
|
output_cols_prefix: str = f"{prob_func}_"
|
1220
1244
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1221
1245
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1222
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1246
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1247
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1223
1248
|
|
1224
1249
|
@property
|
1225
1250
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -261,7 +263,6 @@ class TweedieRegressor(BaseTransformer):
|
|
261
263
|
sample_weight_col: Optional[str] = None,
|
262
264
|
) -> None:
|
263
265
|
super().__init__()
|
264
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
265
266
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
266
267
|
|
267
268
|
self._deps = list(deps)
|
@@ -289,6 +290,15 @@ class TweedieRegressor(BaseTransformer):
|
|
289
290
|
self.set_drop_input_cols(drop_input_cols)
|
290
291
|
self.set_sample_weight_col(sample_weight_col)
|
291
292
|
|
293
|
+
def _get_rand_id(self) -> str:
|
294
|
+
"""
|
295
|
+
Generate random id to be used in sproc and stage names.
|
296
|
+
|
297
|
+
Returns:
|
298
|
+
Random id string usable in sproc, table, and stage names.
|
299
|
+
"""
|
300
|
+
return str(uuid4()).replace("-", "_").upper()
|
301
|
+
|
292
302
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
293
303
|
"""
|
294
304
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -367,7 +377,7 @@ class TweedieRegressor(BaseTransformer):
|
|
367
377
|
cp.dump(self._sklearn_object, local_transform_file)
|
368
378
|
|
369
379
|
# Create temp stage to run fit.
|
370
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
380
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
371
381
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
372
382
|
SqlResultValidator(
|
373
383
|
session=session,
|
@@ -380,11 +390,12 @@ class TweedieRegressor(BaseTransformer):
|
|
380
390
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
381
391
|
).validate()
|
382
392
|
|
383
|
-
|
393
|
+
# Use posixpath to construct stage paths
|
394
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
395
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
384
396
|
local_result_file_name = get_temp_file_path()
|
385
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
386
397
|
|
387
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
398
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
388
399
|
statement_params = telemetry.get_function_usage_statement_params(
|
389
400
|
project=_PROJECT,
|
390
401
|
subproject=_SUBPROJECT,
|
@@ -410,6 +421,7 @@ class TweedieRegressor(BaseTransformer):
|
|
410
421
|
replace=True,
|
411
422
|
session=session,
|
412
423
|
statement_params=statement_params,
|
424
|
+
anonymous=True
|
413
425
|
)
|
414
426
|
def fit_wrapper_sproc(
|
415
427
|
session: Session,
|
@@ -418,7 +430,8 @@ class TweedieRegressor(BaseTransformer):
|
|
418
430
|
stage_result_file_name: str,
|
419
431
|
input_cols: List[str],
|
420
432
|
label_cols: List[str],
|
421
|
-
sample_weight_col: Optional[str]
|
433
|
+
sample_weight_col: Optional[str],
|
434
|
+
statement_params: Dict[str, str]
|
422
435
|
) -> str:
|
423
436
|
import cloudpickle as cp
|
424
437
|
import numpy as np
|
@@ -485,15 +498,15 @@ class TweedieRegressor(BaseTransformer):
|
|
485
498
|
api_calls=[Session.call],
|
486
499
|
custom_tags=dict([("autogen", True)]),
|
487
500
|
)
|
488
|
-
sproc_export_file_name =
|
489
|
-
|
501
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
502
|
+
session,
|
490
503
|
query,
|
491
504
|
stage_transform_file_name,
|
492
505
|
stage_result_file_name,
|
493
506
|
identifier.get_unescaped_names(self.input_cols),
|
494
507
|
identifier.get_unescaped_names(self.label_cols),
|
495
508
|
identifier.get_unescaped_names(self.sample_weight_col),
|
496
|
-
statement_params
|
509
|
+
statement_params,
|
497
510
|
)
|
498
511
|
|
499
512
|
if "|" in sproc_export_file_name:
|
@@ -503,7 +516,7 @@ class TweedieRegressor(BaseTransformer):
|
|
503
516
|
print("\n".join(fields[1:]))
|
504
517
|
|
505
518
|
session.file.get(
|
506
|
-
|
519
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
507
520
|
local_result_file_name,
|
508
521
|
statement_params=statement_params
|
509
522
|
)
|
@@ -549,7 +562,7 @@ class TweedieRegressor(BaseTransformer):
|
|
549
562
|
|
550
563
|
# Register vectorized UDF for batch inference
|
551
564
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
552
|
-
safe_id=self.
|
565
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
553
566
|
|
554
567
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
555
568
|
# will try to pickle all of self which fails.
|
@@ -641,7 +654,7 @@ class TweedieRegressor(BaseTransformer):
|
|
641
654
|
return transformed_pandas_df.to_dict("records")
|
642
655
|
|
643
656
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
644
|
-
safe_id=self.
|
657
|
+
safe_id=self._get_rand_id()
|
645
658
|
)
|
646
659
|
|
647
660
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -808,11 +821,18 @@ class TweedieRegressor(BaseTransformer):
|
|
808
821
|
Transformed dataset.
|
809
822
|
"""
|
810
823
|
if isinstance(dataset, DataFrame):
|
824
|
+
expected_type_inferred = "float"
|
825
|
+
# when it is classifier, infer the datatype from label columns
|
826
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
827
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
828
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
829
|
+
)
|
830
|
+
|
811
831
|
output_df = self._batch_inference(
|
812
832
|
dataset=dataset,
|
813
833
|
inference_method="predict",
|
814
834
|
expected_output_cols_list=self.output_cols,
|
815
|
-
expected_output_cols_type=
|
835
|
+
expected_output_cols_type=expected_type_inferred,
|
816
836
|
)
|
817
837
|
elif isinstance(dataset, pd.DataFrame):
|
818
838
|
output_df = self._sklearn_inference(
|
@@ -883,10 +903,10 @@ class TweedieRegressor(BaseTransformer):
|
|
883
903
|
|
884
904
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
885
905
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
886
|
-
Returns
|
906
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
887
907
|
"""
|
888
908
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
889
|
-
return []
|
909
|
+
return [output_cols_prefix]
|
890
910
|
|
891
911
|
classes = self._sklearn_object.classes_
|
892
912
|
if isinstance(classes, numpy.ndarray):
|
@@ -1111,7 +1131,7 @@ class TweedieRegressor(BaseTransformer):
|
|
1111
1131
|
cp.dump(self._sklearn_object, local_score_file)
|
1112
1132
|
|
1113
1133
|
# Create temp stage to run score.
|
1114
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1134
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1115
1135
|
session = dataset._session
|
1116
1136
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1117
1137
|
SqlResultValidator(
|
@@ -1125,8 +1145,9 @@ class TweedieRegressor(BaseTransformer):
|
|
1125
1145
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1126
1146
|
).validate()
|
1127
1147
|
|
1128
|
-
|
1129
|
-
|
1148
|
+
# Use posixpath to construct stage paths
|
1149
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1150
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1130
1151
|
statement_params = telemetry.get_function_usage_statement_params(
|
1131
1152
|
project=_PROJECT,
|
1132
1153
|
subproject=_SUBPROJECT,
|
@@ -1152,6 +1173,7 @@ class TweedieRegressor(BaseTransformer):
|
|
1152
1173
|
replace=True,
|
1153
1174
|
session=session,
|
1154
1175
|
statement_params=statement_params,
|
1176
|
+
anonymous=True
|
1155
1177
|
)
|
1156
1178
|
def score_wrapper_sproc(
|
1157
1179
|
session: Session,
|
@@ -1159,7 +1181,8 @@ class TweedieRegressor(BaseTransformer):
|
|
1159
1181
|
stage_score_file_name: str,
|
1160
1182
|
input_cols: List[str],
|
1161
1183
|
label_cols: List[str],
|
1162
|
-
sample_weight_col: Optional[str]
|
1184
|
+
sample_weight_col: Optional[str],
|
1185
|
+
statement_params: Dict[str, str]
|
1163
1186
|
) -> float:
|
1164
1187
|
import cloudpickle as cp
|
1165
1188
|
import numpy as np
|
@@ -1209,14 +1232,14 @@ class TweedieRegressor(BaseTransformer):
|
|
1209
1232
|
api_calls=[Session.call],
|
1210
1233
|
custom_tags=dict([("autogen", True)]),
|
1211
1234
|
)
|
1212
|
-
score =
|
1213
|
-
|
1235
|
+
score = score_wrapper_sproc(
|
1236
|
+
session,
|
1214
1237
|
query,
|
1215
1238
|
stage_score_file_name,
|
1216
1239
|
identifier.get_unescaped_names(self.input_cols),
|
1217
1240
|
identifier.get_unescaped_names(self.label_cols),
|
1218
1241
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1219
|
-
statement_params
|
1242
|
+
statement_params,
|
1220
1243
|
)
|
1221
1244
|
|
1222
1245
|
cleanup_temp_files([local_score_file_name])
|
@@ -1234,18 +1257,20 @@ class TweedieRegressor(BaseTransformer):
|
|
1234
1257
|
if self._sklearn_object._estimator_type == 'classifier':
|
1235
1258
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1236
1259
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1237
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1260
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1261
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1238
1262
|
# For regressor, the type of predict is float64
|
1239
1263
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1240
1264
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1241
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1242
|
-
|
1265
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1266
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1243
1267
|
for prob_func in PROB_FUNCTIONS:
|
1244
1268
|
if hasattr(self, prob_func):
|
1245
1269
|
output_cols_prefix: str = f"{prob_func}_"
|
1246
1270
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1247
1271
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1248
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1272
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1273
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1249
1274
|
|
1250
1275
|
@property
|
1251
1276
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|