snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -219,7 +221,6 @@ class GraphicalLasso(BaseTransformer):
|
|
219
221
|
sample_weight_col: Optional[str] = None,
|
220
222
|
) -> None:
|
221
223
|
super().__init__()
|
222
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
223
224
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
224
225
|
|
225
226
|
self._deps = list(deps)
|
@@ -245,6 +246,15 @@ class GraphicalLasso(BaseTransformer):
|
|
245
246
|
self.set_drop_input_cols(drop_input_cols)
|
246
247
|
self.set_sample_weight_col(sample_weight_col)
|
247
248
|
|
249
|
+
def _get_rand_id(self) -> str:
|
250
|
+
"""
|
251
|
+
Generate random id to be used in sproc and stage names.
|
252
|
+
|
253
|
+
Returns:
|
254
|
+
Random id string usable in sproc, table, and stage names.
|
255
|
+
"""
|
256
|
+
return str(uuid4()).replace("-", "_").upper()
|
257
|
+
|
248
258
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
249
259
|
"""
|
250
260
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -323,7 +333,7 @@ class GraphicalLasso(BaseTransformer):
|
|
323
333
|
cp.dump(self._sklearn_object, local_transform_file)
|
324
334
|
|
325
335
|
# Create temp stage to run fit.
|
326
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
336
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
327
337
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
328
338
|
SqlResultValidator(
|
329
339
|
session=session,
|
@@ -336,11 +346,12 @@ class GraphicalLasso(BaseTransformer):
|
|
336
346
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
337
347
|
).validate()
|
338
348
|
|
339
|
-
|
349
|
+
# Use posixpath to construct stage paths
|
350
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
351
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
340
352
|
local_result_file_name = get_temp_file_path()
|
341
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
342
353
|
|
343
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
354
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
344
355
|
statement_params = telemetry.get_function_usage_statement_params(
|
345
356
|
project=_PROJECT,
|
346
357
|
subproject=_SUBPROJECT,
|
@@ -366,6 +377,7 @@ class GraphicalLasso(BaseTransformer):
|
|
366
377
|
replace=True,
|
367
378
|
session=session,
|
368
379
|
statement_params=statement_params,
|
380
|
+
anonymous=True
|
369
381
|
)
|
370
382
|
def fit_wrapper_sproc(
|
371
383
|
session: Session,
|
@@ -374,7 +386,8 @@ class GraphicalLasso(BaseTransformer):
|
|
374
386
|
stage_result_file_name: str,
|
375
387
|
input_cols: List[str],
|
376
388
|
label_cols: List[str],
|
377
|
-
sample_weight_col: Optional[str]
|
389
|
+
sample_weight_col: Optional[str],
|
390
|
+
statement_params: Dict[str, str]
|
378
391
|
) -> str:
|
379
392
|
import cloudpickle as cp
|
380
393
|
import numpy as np
|
@@ -441,15 +454,15 @@ class GraphicalLasso(BaseTransformer):
|
|
441
454
|
api_calls=[Session.call],
|
442
455
|
custom_tags=dict([("autogen", True)]),
|
443
456
|
)
|
444
|
-
sproc_export_file_name =
|
445
|
-
|
457
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
458
|
+
session,
|
446
459
|
query,
|
447
460
|
stage_transform_file_name,
|
448
461
|
stage_result_file_name,
|
449
462
|
identifier.get_unescaped_names(self.input_cols),
|
450
463
|
identifier.get_unescaped_names(self.label_cols),
|
451
464
|
identifier.get_unescaped_names(self.sample_weight_col),
|
452
|
-
statement_params
|
465
|
+
statement_params,
|
453
466
|
)
|
454
467
|
|
455
468
|
if "|" in sproc_export_file_name:
|
@@ -459,7 +472,7 @@ class GraphicalLasso(BaseTransformer):
|
|
459
472
|
print("\n".join(fields[1:]))
|
460
473
|
|
461
474
|
session.file.get(
|
462
|
-
|
475
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
463
476
|
local_result_file_name,
|
464
477
|
statement_params=statement_params
|
465
478
|
)
|
@@ -505,7 +518,7 @@ class GraphicalLasso(BaseTransformer):
|
|
505
518
|
|
506
519
|
# Register vectorized UDF for batch inference
|
507
520
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
508
|
-
safe_id=self.
|
521
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
509
522
|
|
510
523
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
511
524
|
# will try to pickle all of self which fails.
|
@@ -597,7 +610,7 @@ class GraphicalLasso(BaseTransformer):
|
|
597
610
|
return transformed_pandas_df.to_dict("records")
|
598
611
|
|
599
612
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
600
|
-
safe_id=self.
|
613
|
+
safe_id=self._get_rand_id()
|
601
614
|
)
|
602
615
|
|
603
616
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -762,11 +775,18 @@ class GraphicalLasso(BaseTransformer):
|
|
762
775
|
Transformed dataset.
|
763
776
|
"""
|
764
777
|
if isinstance(dataset, DataFrame):
|
778
|
+
expected_type_inferred = ""
|
779
|
+
# when it is classifier, infer the datatype from label columns
|
780
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
781
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
782
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
783
|
+
)
|
784
|
+
|
765
785
|
output_df = self._batch_inference(
|
766
786
|
dataset=dataset,
|
767
787
|
inference_method="predict",
|
768
788
|
expected_output_cols_list=self.output_cols,
|
769
|
-
expected_output_cols_type=
|
789
|
+
expected_output_cols_type=expected_type_inferred,
|
770
790
|
)
|
771
791
|
elif isinstance(dataset, pd.DataFrame):
|
772
792
|
output_df = self._sklearn_inference(
|
@@ -837,10 +857,10 @@ class GraphicalLasso(BaseTransformer):
|
|
837
857
|
|
838
858
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
839
859
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
840
|
-
Returns
|
860
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
841
861
|
"""
|
842
862
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
843
|
-
return []
|
863
|
+
return [output_cols_prefix]
|
844
864
|
|
845
865
|
classes = self._sklearn_object.classes_
|
846
866
|
if isinstance(classes, numpy.ndarray):
|
@@ -1065,7 +1085,7 @@ class GraphicalLasso(BaseTransformer):
|
|
1065
1085
|
cp.dump(self._sklearn_object, local_score_file)
|
1066
1086
|
|
1067
1087
|
# Create temp stage to run score.
|
1068
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1088
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1069
1089
|
session = dataset._session
|
1070
1090
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1071
1091
|
SqlResultValidator(
|
@@ -1079,8 +1099,9 @@ class GraphicalLasso(BaseTransformer):
|
|
1079
1099
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1080
1100
|
).validate()
|
1081
1101
|
|
1082
|
-
|
1083
|
-
|
1102
|
+
# Use posixpath to construct stage paths
|
1103
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1104
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1084
1105
|
statement_params = telemetry.get_function_usage_statement_params(
|
1085
1106
|
project=_PROJECT,
|
1086
1107
|
subproject=_SUBPROJECT,
|
@@ -1106,6 +1127,7 @@ class GraphicalLasso(BaseTransformer):
|
|
1106
1127
|
replace=True,
|
1107
1128
|
session=session,
|
1108
1129
|
statement_params=statement_params,
|
1130
|
+
anonymous=True
|
1109
1131
|
)
|
1110
1132
|
def score_wrapper_sproc(
|
1111
1133
|
session: Session,
|
@@ -1113,7 +1135,8 @@ class GraphicalLasso(BaseTransformer):
|
|
1113
1135
|
stage_score_file_name: str,
|
1114
1136
|
input_cols: List[str],
|
1115
1137
|
label_cols: List[str],
|
1116
|
-
sample_weight_col: Optional[str]
|
1138
|
+
sample_weight_col: Optional[str],
|
1139
|
+
statement_params: Dict[str, str]
|
1117
1140
|
) -> float:
|
1118
1141
|
import cloudpickle as cp
|
1119
1142
|
import numpy as np
|
@@ -1163,14 +1186,14 @@ class GraphicalLasso(BaseTransformer):
|
|
1163
1186
|
api_calls=[Session.call],
|
1164
1187
|
custom_tags=dict([("autogen", True)]),
|
1165
1188
|
)
|
1166
|
-
score =
|
1167
|
-
|
1189
|
+
score = score_wrapper_sproc(
|
1190
|
+
session,
|
1168
1191
|
query,
|
1169
1192
|
stage_score_file_name,
|
1170
1193
|
identifier.get_unescaped_names(self.input_cols),
|
1171
1194
|
identifier.get_unescaped_names(self.label_cols),
|
1172
1195
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1173
|
-
statement_params
|
1196
|
+
statement_params,
|
1174
1197
|
)
|
1175
1198
|
|
1176
1199
|
cleanup_temp_files([local_score_file_name])
|
@@ -1188,18 +1211,20 @@ class GraphicalLasso(BaseTransformer):
|
|
1188
1211
|
if self._sklearn_object._estimator_type == 'classifier':
|
1189
1212
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1190
1213
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1191
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1214
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1215
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1192
1216
|
# For regressor, the type of predict is float64
|
1193
1217
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1194
1218
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1195
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1196
|
-
|
1219
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1220
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1197
1221
|
for prob_func in PROB_FUNCTIONS:
|
1198
1222
|
if hasattr(self, prob_func):
|
1199
1223
|
output_cols_prefix: str = f"{prob_func}_"
|
1200
1224
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1201
1225
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1202
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1226
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1227
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1203
1228
|
|
1204
1229
|
@property
|
1205
1230
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -249,7 +251,6 @@ class GraphicalLassoCV(BaseTransformer):
|
|
249
251
|
sample_weight_col: Optional[str] = None,
|
250
252
|
) -> None:
|
251
253
|
super().__init__()
|
252
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
253
254
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
254
255
|
|
255
256
|
self._deps = list(deps)
|
@@ -278,6 +279,15 @@ class GraphicalLassoCV(BaseTransformer):
|
|
278
279
|
self.set_drop_input_cols(drop_input_cols)
|
279
280
|
self.set_sample_weight_col(sample_weight_col)
|
280
281
|
|
282
|
+
def _get_rand_id(self) -> str:
|
283
|
+
"""
|
284
|
+
Generate random id to be used in sproc and stage names.
|
285
|
+
|
286
|
+
Returns:
|
287
|
+
Random id string usable in sproc, table, and stage names.
|
288
|
+
"""
|
289
|
+
return str(uuid4()).replace("-", "_").upper()
|
290
|
+
|
281
291
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
282
292
|
"""
|
283
293
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -356,7 +366,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
356
366
|
cp.dump(self._sklearn_object, local_transform_file)
|
357
367
|
|
358
368
|
# Create temp stage to run fit.
|
359
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
369
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
360
370
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
361
371
|
SqlResultValidator(
|
362
372
|
session=session,
|
@@ -369,11 +379,12 @@ class GraphicalLassoCV(BaseTransformer):
|
|
369
379
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
370
380
|
).validate()
|
371
381
|
|
372
|
-
|
382
|
+
# Use posixpath to construct stage paths
|
383
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
384
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
373
385
|
local_result_file_name = get_temp_file_path()
|
374
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
375
386
|
|
376
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
387
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
377
388
|
statement_params = telemetry.get_function_usage_statement_params(
|
378
389
|
project=_PROJECT,
|
379
390
|
subproject=_SUBPROJECT,
|
@@ -399,6 +410,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
399
410
|
replace=True,
|
400
411
|
session=session,
|
401
412
|
statement_params=statement_params,
|
413
|
+
anonymous=True
|
402
414
|
)
|
403
415
|
def fit_wrapper_sproc(
|
404
416
|
session: Session,
|
@@ -407,7 +419,8 @@ class GraphicalLassoCV(BaseTransformer):
|
|
407
419
|
stage_result_file_name: str,
|
408
420
|
input_cols: List[str],
|
409
421
|
label_cols: List[str],
|
410
|
-
sample_weight_col: Optional[str]
|
422
|
+
sample_weight_col: Optional[str],
|
423
|
+
statement_params: Dict[str, str]
|
411
424
|
) -> str:
|
412
425
|
import cloudpickle as cp
|
413
426
|
import numpy as np
|
@@ -474,15 +487,15 @@ class GraphicalLassoCV(BaseTransformer):
|
|
474
487
|
api_calls=[Session.call],
|
475
488
|
custom_tags=dict([("autogen", True)]),
|
476
489
|
)
|
477
|
-
sproc_export_file_name =
|
478
|
-
|
490
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
491
|
+
session,
|
479
492
|
query,
|
480
493
|
stage_transform_file_name,
|
481
494
|
stage_result_file_name,
|
482
495
|
identifier.get_unescaped_names(self.input_cols),
|
483
496
|
identifier.get_unescaped_names(self.label_cols),
|
484
497
|
identifier.get_unescaped_names(self.sample_weight_col),
|
485
|
-
statement_params
|
498
|
+
statement_params,
|
486
499
|
)
|
487
500
|
|
488
501
|
if "|" in sproc_export_file_name:
|
@@ -492,7 +505,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
492
505
|
print("\n".join(fields[1:]))
|
493
506
|
|
494
507
|
session.file.get(
|
495
|
-
|
508
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
496
509
|
local_result_file_name,
|
497
510
|
statement_params=statement_params
|
498
511
|
)
|
@@ -538,7 +551,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
538
551
|
|
539
552
|
# Register vectorized UDF for batch inference
|
540
553
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
541
|
-
safe_id=self.
|
554
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
542
555
|
|
543
556
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
544
557
|
# will try to pickle all of self which fails.
|
@@ -630,7 +643,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
630
643
|
return transformed_pandas_df.to_dict("records")
|
631
644
|
|
632
645
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
633
|
-
safe_id=self.
|
646
|
+
safe_id=self._get_rand_id()
|
634
647
|
)
|
635
648
|
|
636
649
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -795,11 +808,18 @@ class GraphicalLassoCV(BaseTransformer):
|
|
795
808
|
Transformed dataset.
|
796
809
|
"""
|
797
810
|
if isinstance(dataset, DataFrame):
|
811
|
+
expected_type_inferred = ""
|
812
|
+
# when it is classifier, infer the datatype from label columns
|
813
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
814
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
815
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
816
|
+
)
|
817
|
+
|
798
818
|
output_df = self._batch_inference(
|
799
819
|
dataset=dataset,
|
800
820
|
inference_method="predict",
|
801
821
|
expected_output_cols_list=self.output_cols,
|
802
|
-
expected_output_cols_type=
|
822
|
+
expected_output_cols_type=expected_type_inferred,
|
803
823
|
)
|
804
824
|
elif isinstance(dataset, pd.DataFrame):
|
805
825
|
output_df = self._sklearn_inference(
|
@@ -870,10 +890,10 @@ class GraphicalLassoCV(BaseTransformer):
|
|
870
890
|
|
871
891
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
872
892
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
873
|
-
Returns
|
893
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
874
894
|
"""
|
875
895
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
876
|
-
return []
|
896
|
+
return [output_cols_prefix]
|
877
897
|
|
878
898
|
classes = self._sklearn_object.classes_
|
879
899
|
if isinstance(classes, numpy.ndarray):
|
@@ -1098,7 +1118,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
1098
1118
|
cp.dump(self._sklearn_object, local_score_file)
|
1099
1119
|
|
1100
1120
|
# Create temp stage to run score.
|
1101
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1121
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1102
1122
|
session = dataset._session
|
1103
1123
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1104
1124
|
SqlResultValidator(
|
@@ -1112,8 +1132,9 @@ class GraphicalLassoCV(BaseTransformer):
|
|
1112
1132
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1113
1133
|
).validate()
|
1114
1134
|
|
1115
|
-
|
1116
|
-
|
1135
|
+
# Use posixpath to construct stage paths
|
1136
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1137
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1117
1138
|
statement_params = telemetry.get_function_usage_statement_params(
|
1118
1139
|
project=_PROJECT,
|
1119
1140
|
subproject=_SUBPROJECT,
|
@@ -1139,6 +1160,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
1139
1160
|
replace=True,
|
1140
1161
|
session=session,
|
1141
1162
|
statement_params=statement_params,
|
1163
|
+
anonymous=True
|
1142
1164
|
)
|
1143
1165
|
def score_wrapper_sproc(
|
1144
1166
|
session: Session,
|
@@ -1146,7 +1168,8 @@ class GraphicalLassoCV(BaseTransformer):
|
|
1146
1168
|
stage_score_file_name: str,
|
1147
1169
|
input_cols: List[str],
|
1148
1170
|
label_cols: List[str],
|
1149
|
-
sample_weight_col: Optional[str]
|
1171
|
+
sample_weight_col: Optional[str],
|
1172
|
+
statement_params: Dict[str, str]
|
1150
1173
|
) -> float:
|
1151
1174
|
import cloudpickle as cp
|
1152
1175
|
import numpy as np
|
@@ -1196,14 +1219,14 @@ class GraphicalLassoCV(BaseTransformer):
|
|
1196
1219
|
api_calls=[Session.call],
|
1197
1220
|
custom_tags=dict([("autogen", True)]),
|
1198
1221
|
)
|
1199
|
-
score =
|
1200
|
-
|
1222
|
+
score = score_wrapper_sproc(
|
1223
|
+
session,
|
1201
1224
|
query,
|
1202
1225
|
stage_score_file_name,
|
1203
1226
|
identifier.get_unescaped_names(self.input_cols),
|
1204
1227
|
identifier.get_unescaped_names(self.label_cols),
|
1205
1228
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1206
|
-
statement_params
|
1229
|
+
statement_params,
|
1207
1230
|
)
|
1208
1231
|
|
1209
1232
|
cleanup_temp_files([local_score_file_name])
|
@@ -1221,18 +1244,20 @@ class GraphicalLassoCV(BaseTransformer):
|
|
1221
1244
|
if self._sklearn_object._estimator_type == 'classifier':
|
1222
1245
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1223
1246
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1224
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1247
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1248
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1225
1249
|
# For regressor, the type of predict is float64
|
1226
1250
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1227
1251
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1228
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1229
|
-
|
1252
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1253
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1230
1254
|
for prob_func in PROB_FUNCTIONS:
|
1231
1255
|
if hasattr(self, prob_func):
|
1232
1256
|
output_cols_prefix: str = f"{prob_func}_"
|
1233
1257
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1234
1258
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1235
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1259
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1260
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1236
1261
|
|
1237
1262
|
@property
|
1238
1263
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|