snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -203,7 +205,6 @@ class LinearRegression(BaseTransformer):
|
|
203
205
|
sample_weight_col: Optional[str] = None,
|
204
206
|
) -> None:
|
205
207
|
super().__init__()
|
206
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
207
208
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
208
209
|
|
209
210
|
self._deps = list(deps)
|
@@ -226,6 +227,15 @@ class LinearRegression(BaseTransformer):
|
|
226
227
|
self.set_drop_input_cols(drop_input_cols)
|
227
228
|
self.set_sample_weight_col(sample_weight_col)
|
228
229
|
|
230
|
+
def _get_rand_id(self) -> str:
|
231
|
+
"""
|
232
|
+
Generate random id to be used in sproc and stage names.
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
Random id string usable in sproc, table, and stage names.
|
236
|
+
"""
|
237
|
+
return str(uuid4()).replace("-", "_").upper()
|
238
|
+
|
229
239
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
230
240
|
"""
|
231
241
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -304,7 +314,7 @@ class LinearRegression(BaseTransformer):
|
|
304
314
|
cp.dump(self._sklearn_object, local_transform_file)
|
305
315
|
|
306
316
|
# Create temp stage to run fit.
|
307
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
317
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
308
318
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
309
319
|
SqlResultValidator(
|
310
320
|
session=session,
|
@@ -317,11 +327,12 @@ class LinearRegression(BaseTransformer):
|
|
317
327
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
318
328
|
).validate()
|
319
329
|
|
320
|
-
|
330
|
+
# Use posixpath to construct stage paths
|
331
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
332
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
321
333
|
local_result_file_name = get_temp_file_path()
|
322
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
323
334
|
|
324
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
335
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
325
336
|
statement_params = telemetry.get_function_usage_statement_params(
|
326
337
|
project=_PROJECT,
|
327
338
|
subproject=_SUBPROJECT,
|
@@ -347,6 +358,7 @@ class LinearRegression(BaseTransformer):
|
|
347
358
|
replace=True,
|
348
359
|
session=session,
|
349
360
|
statement_params=statement_params,
|
361
|
+
anonymous=True
|
350
362
|
)
|
351
363
|
def fit_wrapper_sproc(
|
352
364
|
session: Session,
|
@@ -355,7 +367,8 @@ class LinearRegression(BaseTransformer):
|
|
355
367
|
stage_result_file_name: str,
|
356
368
|
input_cols: List[str],
|
357
369
|
label_cols: List[str],
|
358
|
-
sample_weight_col: Optional[str]
|
370
|
+
sample_weight_col: Optional[str],
|
371
|
+
statement_params: Dict[str, str]
|
359
372
|
) -> str:
|
360
373
|
import cloudpickle as cp
|
361
374
|
import numpy as np
|
@@ -422,15 +435,15 @@ class LinearRegression(BaseTransformer):
|
|
422
435
|
api_calls=[Session.call],
|
423
436
|
custom_tags=dict([("autogen", True)]),
|
424
437
|
)
|
425
|
-
sproc_export_file_name =
|
426
|
-
|
438
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
439
|
+
session,
|
427
440
|
query,
|
428
441
|
stage_transform_file_name,
|
429
442
|
stage_result_file_name,
|
430
443
|
identifier.get_unescaped_names(self.input_cols),
|
431
444
|
identifier.get_unescaped_names(self.label_cols),
|
432
445
|
identifier.get_unescaped_names(self.sample_weight_col),
|
433
|
-
statement_params
|
446
|
+
statement_params,
|
434
447
|
)
|
435
448
|
|
436
449
|
if "|" in sproc_export_file_name:
|
@@ -440,7 +453,7 @@ class LinearRegression(BaseTransformer):
|
|
440
453
|
print("\n".join(fields[1:]))
|
441
454
|
|
442
455
|
session.file.get(
|
443
|
-
|
456
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
444
457
|
local_result_file_name,
|
445
458
|
statement_params=statement_params
|
446
459
|
)
|
@@ -486,7 +499,7 @@ class LinearRegression(BaseTransformer):
|
|
486
499
|
|
487
500
|
# Register vectorized UDF for batch inference
|
488
501
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
489
|
-
safe_id=self.
|
502
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
490
503
|
|
491
504
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
492
505
|
# will try to pickle all of self which fails.
|
@@ -578,7 +591,7 @@ class LinearRegression(BaseTransformer):
|
|
578
591
|
return transformed_pandas_df.to_dict("records")
|
579
592
|
|
580
593
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
581
|
-
safe_id=self.
|
594
|
+
safe_id=self._get_rand_id()
|
582
595
|
)
|
583
596
|
|
584
597
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -745,11 +758,18 @@ class LinearRegression(BaseTransformer):
|
|
745
758
|
Transformed dataset.
|
746
759
|
"""
|
747
760
|
if isinstance(dataset, DataFrame):
|
761
|
+
expected_type_inferred = "float"
|
762
|
+
# when it is classifier, infer the datatype from label columns
|
763
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
764
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
765
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
766
|
+
)
|
767
|
+
|
748
768
|
output_df = self._batch_inference(
|
749
769
|
dataset=dataset,
|
750
770
|
inference_method="predict",
|
751
771
|
expected_output_cols_list=self.output_cols,
|
752
|
-
expected_output_cols_type=
|
772
|
+
expected_output_cols_type=expected_type_inferred,
|
753
773
|
)
|
754
774
|
elif isinstance(dataset, pd.DataFrame):
|
755
775
|
output_df = self._sklearn_inference(
|
@@ -820,10 +840,10 @@ class LinearRegression(BaseTransformer):
|
|
820
840
|
|
821
841
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
822
842
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
823
|
-
Returns
|
843
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
824
844
|
"""
|
825
845
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
826
|
-
return []
|
846
|
+
return [output_cols_prefix]
|
827
847
|
|
828
848
|
classes = self._sklearn_object.classes_
|
829
849
|
if isinstance(classes, numpy.ndarray):
|
@@ -1048,7 +1068,7 @@ class LinearRegression(BaseTransformer):
|
|
1048
1068
|
cp.dump(self._sklearn_object, local_score_file)
|
1049
1069
|
|
1050
1070
|
# Create temp stage to run score.
|
1051
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1071
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1052
1072
|
session = dataset._session
|
1053
1073
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1054
1074
|
SqlResultValidator(
|
@@ -1062,8 +1082,9 @@ class LinearRegression(BaseTransformer):
|
|
1062
1082
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1063
1083
|
).validate()
|
1064
1084
|
|
1065
|
-
|
1066
|
-
|
1085
|
+
# Use posixpath to construct stage paths
|
1086
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1087
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1067
1088
|
statement_params = telemetry.get_function_usage_statement_params(
|
1068
1089
|
project=_PROJECT,
|
1069
1090
|
subproject=_SUBPROJECT,
|
@@ -1089,6 +1110,7 @@ class LinearRegression(BaseTransformer):
|
|
1089
1110
|
replace=True,
|
1090
1111
|
session=session,
|
1091
1112
|
statement_params=statement_params,
|
1113
|
+
anonymous=True
|
1092
1114
|
)
|
1093
1115
|
def score_wrapper_sproc(
|
1094
1116
|
session: Session,
|
@@ -1096,7 +1118,8 @@ class LinearRegression(BaseTransformer):
|
|
1096
1118
|
stage_score_file_name: str,
|
1097
1119
|
input_cols: List[str],
|
1098
1120
|
label_cols: List[str],
|
1099
|
-
sample_weight_col: Optional[str]
|
1121
|
+
sample_weight_col: Optional[str],
|
1122
|
+
statement_params: Dict[str, str]
|
1100
1123
|
) -> float:
|
1101
1124
|
import cloudpickle as cp
|
1102
1125
|
import numpy as np
|
@@ -1146,14 +1169,14 @@ class LinearRegression(BaseTransformer):
|
|
1146
1169
|
api_calls=[Session.call],
|
1147
1170
|
custom_tags=dict([("autogen", True)]),
|
1148
1171
|
)
|
1149
|
-
score =
|
1150
|
-
|
1172
|
+
score = score_wrapper_sproc(
|
1173
|
+
session,
|
1151
1174
|
query,
|
1152
1175
|
stage_score_file_name,
|
1153
1176
|
identifier.get_unescaped_names(self.input_cols),
|
1154
1177
|
identifier.get_unescaped_names(self.label_cols),
|
1155
1178
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1156
|
-
statement_params
|
1179
|
+
statement_params,
|
1157
1180
|
)
|
1158
1181
|
|
1159
1182
|
cleanup_temp_files([local_score_file_name])
|
@@ -1171,18 +1194,20 @@ class LinearRegression(BaseTransformer):
|
|
1171
1194
|
if self._sklearn_object._estimator_type == 'classifier':
|
1172
1195
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1173
1196
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1174
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1197
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1198
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1175
1199
|
# For regressor, the type of predict is float64
|
1176
1200
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1177
1201
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1178
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1179
|
-
|
1202
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1203
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1180
1204
|
for prob_func in PROB_FUNCTIONS:
|
1181
1205
|
if hasattr(self, prob_func):
|
1182
1206
|
output_cols_prefix: str = f"{prob_func}_"
|
1183
1207
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1184
1208
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1185
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1209
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1210
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1186
1211
|
|
1187
1212
|
@property
|
1188
1213
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -306,7 +308,6 @@ class LogisticRegression(BaseTransformer):
|
|
306
308
|
sample_weight_col: Optional[str] = None,
|
307
309
|
) -> None:
|
308
310
|
super().__init__()
|
309
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
310
311
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
311
312
|
|
312
313
|
self._deps = list(deps)
|
@@ -340,6 +341,15 @@ class LogisticRegression(BaseTransformer):
|
|
340
341
|
self.set_drop_input_cols(drop_input_cols)
|
341
342
|
self.set_sample_weight_col(sample_weight_col)
|
342
343
|
|
344
|
+
def _get_rand_id(self) -> str:
|
345
|
+
"""
|
346
|
+
Generate random id to be used in sproc and stage names.
|
347
|
+
|
348
|
+
Returns:
|
349
|
+
Random id string usable in sproc, table, and stage names.
|
350
|
+
"""
|
351
|
+
return str(uuid4()).replace("-", "_").upper()
|
352
|
+
|
343
353
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
344
354
|
"""
|
345
355
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -418,7 +428,7 @@ class LogisticRegression(BaseTransformer):
|
|
418
428
|
cp.dump(self._sklearn_object, local_transform_file)
|
419
429
|
|
420
430
|
# Create temp stage to run fit.
|
421
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
431
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
422
432
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
423
433
|
SqlResultValidator(
|
424
434
|
session=session,
|
@@ -431,11 +441,12 @@ class LogisticRegression(BaseTransformer):
|
|
431
441
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
432
442
|
).validate()
|
433
443
|
|
434
|
-
|
444
|
+
# Use posixpath to construct stage paths
|
445
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
446
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
435
447
|
local_result_file_name = get_temp_file_path()
|
436
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
437
448
|
|
438
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
449
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
439
450
|
statement_params = telemetry.get_function_usage_statement_params(
|
440
451
|
project=_PROJECT,
|
441
452
|
subproject=_SUBPROJECT,
|
@@ -461,6 +472,7 @@ class LogisticRegression(BaseTransformer):
|
|
461
472
|
replace=True,
|
462
473
|
session=session,
|
463
474
|
statement_params=statement_params,
|
475
|
+
anonymous=True
|
464
476
|
)
|
465
477
|
def fit_wrapper_sproc(
|
466
478
|
session: Session,
|
@@ -469,7 +481,8 @@ class LogisticRegression(BaseTransformer):
|
|
469
481
|
stage_result_file_name: str,
|
470
482
|
input_cols: List[str],
|
471
483
|
label_cols: List[str],
|
472
|
-
sample_weight_col: Optional[str]
|
484
|
+
sample_weight_col: Optional[str],
|
485
|
+
statement_params: Dict[str, str]
|
473
486
|
) -> str:
|
474
487
|
import cloudpickle as cp
|
475
488
|
import numpy as np
|
@@ -536,15 +549,15 @@ class LogisticRegression(BaseTransformer):
|
|
536
549
|
api_calls=[Session.call],
|
537
550
|
custom_tags=dict([("autogen", True)]),
|
538
551
|
)
|
539
|
-
sproc_export_file_name =
|
540
|
-
|
552
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
553
|
+
session,
|
541
554
|
query,
|
542
555
|
stage_transform_file_name,
|
543
556
|
stage_result_file_name,
|
544
557
|
identifier.get_unescaped_names(self.input_cols),
|
545
558
|
identifier.get_unescaped_names(self.label_cols),
|
546
559
|
identifier.get_unescaped_names(self.sample_weight_col),
|
547
|
-
statement_params
|
560
|
+
statement_params,
|
548
561
|
)
|
549
562
|
|
550
563
|
if "|" in sproc_export_file_name:
|
@@ -554,7 +567,7 @@ class LogisticRegression(BaseTransformer):
|
|
554
567
|
print("\n".join(fields[1:]))
|
555
568
|
|
556
569
|
session.file.get(
|
557
|
-
|
570
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
558
571
|
local_result_file_name,
|
559
572
|
statement_params=statement_params
|
560
573
|
)
|
@@ -600,7 +613,7 @@ class LogisticRegression(BaseTransformer):
|
|
600
613
|
|
601
614
|
# Register vectorized UDF for batch inference
|
602
615
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
603
|
-
safe_id=self.
|
616
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
604
617
|
|
605
618
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
606
619
|
# will try to pickle all of self which fails.
|
@@ -692,7 +705,7 @@ class LogisticRegression(BaseTransformer):
|
|
692
705
|
return transformed_pandas_df.to_dict("records")
|
693
706
|
|
694
707
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
695
|
-
safe_id=self.
|
708
|
+
safe_id=self._get_rand_id()
|
696
709
|
)
|
697
710
|
|
698
711
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -859,11 +872,18 @@ class LogisticRegression(BaseTransformer):
|
|
859
872
|
Transformed dataset.
|
860
873
|
"""
|
861
874
|
if isinstance(dataset, DataFrame):
|
875
|
+
expected_type_inferred = ""
|
876
|
+
# when it is classifier, infer the datatype from label columns
|
877
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
878
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
879
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
880
|
+
)
|
881
|
+
|
862
882
|
output_df = self._batch_inference(
|
863
883
|
dataset=dataset,
|
864
884
|
inference_method="predict",
|
865
885
|
expected_output_cols_list=self.output_cols,
|
866
|
-
expected_output_cols_type=
|
886
|
+
expected_output_cols_type=expected_type_inferred,
|
867
887
|
)
|
868
888
|
elif isinstance(dataset, pd.DataFrame):
|
869
889
|
output_df = self._sklearn_inference(
|
@@ -934,10 +954,10 @@ class LogisticRegression(BaseTransformer):
|
|
934
954
|
|
935
955
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
936
956
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
937
|
-
Returns
|
957
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
938
958
|
"""
|
939
959
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
940
|
-
return []
|
960
|
+
return [output_cols_prefix]
|
941
961
|
|
942
962
|
classes = self._sklearn_object.classes_
|
943
963
|
if isinstance(classes, numpy.ndarray):
|
@@ -1168,7 +1188,7 @@ class LogisticRegression(BaseTransformer):
|
|
1168
1188
|
cp.dump(self._sklearn_object, local_score_file)
|
1169
1189
|
|
1170
1190
|
# Create temp stage to run score.
|
1171
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1191
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1172
1192
|
session = dataset._session
|
1173
1193
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1174
1194
|
SqlResultValidator(
|
@@ -1182,8 +1202,9 @@ class LogisticRegression(BaseTransformer):
|
|
1182
1202
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1183
1203
|
).validate()
|
1184
1204
|
|
1185
|
-
|
1186
|
-
|
1205
|
+
# Use posixpath to construct stage paths
|
1206
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1207
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1187
1208
|
statement_params = telemetry.get_function_usage_statement_params(
|
1188
1209
|
project=_PROJECT,
|
1189
1210
|
subproject=_SUBPROJECT,
|
@@ -1209,6 +1230,7 @@ class LogisticRegression(BaseTransformer):
|
|
1209
1230
|
replace=True,
|
1210
1231
|
session=session,
|
1211
1232
|
statement_params=statement_params,
|
1233
|
+
anonymous=True
|
1212
1234
|
)
|
1213
1235
|
def score_wrapper_sproc(
|
1214
1236
|
session: Session,
|
@@ -1216,7 +1238,8 @@ class LogisticRegression(BaseTransformer):
|
|
1216
1238
|
stage_score_file_name: str,
|
1217
1239
|
input_cols: List[str],
|
1218
1240
|
label_cols: List[str],
|
1219
|
-
sample_weight_col: Optional[str]
|
1241
|
+
sample_weight_col: Optional[str],
|
1242
|
+
statement_params: Dict[str, str]
|
1220
1243
|
) -> float:
|
1221
1244
|
import cloudpickle as cp
|
1222
1245
|
import numpy as np
|
@@ -1266,14 +1289,14 @@ class LogisticRegression(BaseTransformer):
|
|
1266
1289
|
api_calls=[Session.call],
|
1267
1290
|
custom_tags=dict([("autogen", True)]),
|
1268
1291
|
)
|
1269
|
-
score =
|
1270
|
-
|
1292
|
+
score = score_wrapper_sproc(
|
1293
|
+
session,
|
1271
1294
|
query,
|
1272
1295
|
stage_score_file_name,
|
1273
1296
|
identifier.get_unescaped_names(self.input_cols),
|
1274
1297
|
identifier.get_unescaped_names(self.label_cols),
|
1275
1298
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1276
|
-
statement_params
|
1299
|
+
statement_params,
|
1277
1300
|
)
|
1278
1301
|
|
1279
1302
|
cleanup_temp_files([local_score_file_name])
|
@@ -1291,18 +1314,20 @@ class LogisticRegression(BaseTransformer):
|
|
1291
1314
|
if self._sklearn_object._estimator_type == 'classifier':
|
1292
1315
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1293
1316
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1294
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1317
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1318
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1295
1319
|
# For regressor, the type of predict is float64
|
1296
1320
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1297
1321
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1298
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1299
|
-
|
1322
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1323
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1300
1324
|
for prob_func in PROB_FUNCTIONS:
|
1301
1325
|
if hasattr(self, prob_func):
|
1302
1326
|
output_cols_prefix: str = f"{prob_func}_"
|
1303
1327
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1304
1328
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1305
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1329
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1330
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1306
1331
|
|
1307
1332
|
@property
|
1308
1333
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|