snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -238,7 +240,6 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
238
240
|
sample_weight_col: Optional[str] = None,
|
239
241
|
) -> None:
|
240
242
|
super().__init__()
|
241
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
242
243
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
243
244
|
|
244
245
|
self._deps = list(deps)
|
@@ -264,6 +265,15 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
264
265
|
self.set_drop_input_cols(drop_input_cols)
|
265
266
|
self.set_sample_weight_col(sample_weight_col)
|
266
267
|
|
268
|
+
def _get_rand_id(self) -> str:
|
269
|
+
"""
|
270
|
+
Generate random id to be used in sproc and stage names.
|
271
|
+
|
272
|
+
Returns:
|
273
|
+
Random id string usable in sproc, table, and stage names.
|
274
|
+
"""
|
275
|
+
return str(uuid4()).replace("-", "_").upper()
|
276
|
+
|
267
277
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
268
278
|
"""
|
269
279
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -342,7 +352,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
342
352
|
cp.dump(self._sklearn_object, local_transform_file)
|
343
353
|
|
344
354
|
# Create temp stage to run fit.
|
345
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
355
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
346
356
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
347
357
|
SqlResultValidator(
|
348
358
|
session=session,
|
@@ -355,11 +365,12 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
355
365
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
356
366
|
).validate()
|
357
367
|
|
358
|
-
|
368
|
+
# Use posixpath to construct stage paths
|
369
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
370
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
359
371
|
local_result_file_name = get_temp_file_path()
|
360
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
361
372
|
|
362
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
373
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
363
374
|
statement_params = telemetry.get_function_usage_statement_params(
|
364
375
|
project=_PROJECT,
|
365
376
|
subproject=_SUBPROJECT,
|
@@ -385,6 +396,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
385
396
|
replace=True,
|
386
397
|
session=session,
|
387
398
|
statement_params=statement_params,
|
399
|
+
anonymous=True
|
388
400
|
)
|
389
401
|
def fit_wrapper_sproc(
|
390
402
|
session: Session,
|
@@ -393,7 +405,8 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
393
405
|
stage_result_file_name: str,
|
394
406
|
input_cols: List[str],
|
395
407
|
label_cols: List[str],
|
396
|
-
sample_weight_col: Optional[str]
|
408
|
+
sample_weight_col: Optional[str],
|
409
|
+
statement_params: Dict[str, str]
|
397
410
|
) -> str:
|
398
411
|
import cloudpickle as cp
|
399
412
|
import numpy as np
|
@@ -460,15 +473,15 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
460
473
|
api_calls=[Session.call],
|
461
474
|
custom_tags=dict([("autogen", True)]),
|
462
475
|
)
|
463
|
-
sproc_export_file_name =
|
464
|
-
|
476
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
477
|
+
session,
|
465
478
|
query,
|
466
479
|
stage_transform_file_name,
|
467
480
|
stage_result_file_name,
|
468
481
|
identifier.get_unescaped_names(self.input_cols),
|
469
482
|
identifier.get_unescaped_names(self.label_cols),
|
470
483
|
identifier.get_unescaped_names(self.sample_weight_col),
|
471
|
-
statement_params
|
484
|
+
statement_params,
|
472
485
|
)
|
473
486
|
|
474
487
|
if "|" in sproc_export_file_name:
|
@@ -478,7 +491,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
478
491
|
print("\n".join(fields[1:]))
|
479
492
|
|
480
493
|
session.file.get(
|
481
|
-
|
494
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
482
495
|
local_result_file_name,
|
483
496
|
statement_params=statement_params
|
484
497
|
)
|
@@ -524,7 +537,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
524
537
|
|
525
538
|
# Register vectorized UDF for batch inference
|
526
539
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
527
|
-
safe_id=self.
|
540
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
528
541
|
|
529
542
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
530
543
|
# will try to pickle all of self which fails.
|
@@ -616,7 +629,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
616
629
|
return transformed_pandas_df.to_dict("records")
|
617
630
|
|
618
631
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
619
|
-
safe_id=self.
|
632
|
+
safe_id=self._get_rand_id()
|
620
633
|
)
|
621
634
|
|
622
635
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -783,11 +796,18 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
783
796
|
Transformed dataset.
|
784
797
|
"""
|
785
798
|
if isinstance(dataset, DataFrame):
|
799
|
+
expected_type_inferred = ""
|
800
|
+
# when it is classifier, infer the datatype from label columns
|
801
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
802
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
803
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
804
|
+
)
|
805
|
+
|
786
806
|
output_df = self._batch_inference(
|
787
807
|
dataset=dataset,
|
788
808
|
inference_method="predict",
|
789
809
|
expected_output_cols_list=self.output_cols,
|
790
|
-
expected_output_cols_type=
|
810
|
+
expected_output_cols_type=expected_type_inferred,
|
791
811
|
)
|
792
812
|
elif isinstance(dataset, pd.DataFrame):
|
793
813
|
output_df = self._sklearn_inference(
|
@@ -860,10 +880,10 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
860
880
|
|
861
881
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
862
882
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
863
|
-
Returns
|
883
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
864
884
|
"""
|
865
885
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
866
|
-
return []
|
886
|
+
return [output_cols_prefix]
|
867
887
|
|
868
888
|
classes = self._sklearn_object.classes_
|
869
889
|
if isinstance(classes, numpy.ndarray):
|
@@ -1094,7 +1114,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
1094
1114
|
cp.dump(self._sklearn_object, local_score_file)
|
1095
1115
|
|
1096
1116
|
# Create temp stage to run score.
|
1097
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1117
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1098
1118
|
session = dataset._session
|
1099
1119
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1100
1120
|
SqlResultValidator(
|
@@ -1108,8 +1128,9 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
1108
1128
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1109
1129
|
).validate()
|
1110
1130
|
|
1111
|
-
|
1112
|
-
|
1131
|
+
# Use posixpath to construct stage paths
|
1132
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1133
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1113
1134
|
statement_params = telemetry.get_function_usage_statement_params(
|
1114
1135
|
project=_PROJECT,
|
1115
1136
|
subproject=_SUBPROJECT,
|
@@ -1135,6 +1156,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
1135
1156
|
replace=True,
|
1136
1157
|
session=session,
|
1137
1158
|
statement_params=statement_params,
|
1159
|
+
anonymous=True
|
1138
1160
|
)
|
1139
1161
|
def score_wrapper_sproc(
|
1140
1162
|
session: Session,
|
@@ -1142,7 +1164,8 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
1142
1164
|
stage_score_file_name: str,
|
1143
1165
|
input_cols: List[str],
|
1144
1166
|
label_cols: List[str],
|
1145
|
-
sample_weight_col: Optional[str]
|
1167
|
+
sample_weight_col: Optional[str],
|
1168
|
+
statement_params: Dict[str, str]
|
1146
1169
|
) -> float:
|
1147
1170
|
import cloudpickle as cp
|
1148
1171
|
import numpy as np
|
@@ -1192,14 +1215,14 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
1192
1215
|
api_calls=[Session.call],
|
1193
1216
|
custom_tags=dict([("autogen", True)]),
|
1194
1217
|
)
|
1195
|
-
score =
|
1196
|
-
|
1218
|
+
score = score_wrapper_sproc(
|
1219
|
+
session,
|
1197
1220
|
query,
|
1198
1221
|
stage_score_file_name,
|
1199
1222
|
identifier.get_unescaped_names(self.input_cols),
|
1200
1223
|
identifier.get_unescaped_names(self.label_cols),
|
1201
1224
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1202
|
-
statement_params
|
1225
|
+
statement_params,
|
1203
1226
|
)
|
1204
1227
|
|
1205
1228
|
cleanup_temp_files([local_score_file_name])
|
@@ -1217,18 +1240,20 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
1217
1240
|
if self._sklearn_object._estimator_type == 'classifier':
|
1218
1241
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1219
1242
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1220
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1243
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1244
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1221
1245
|
# For regressor, the type of predict is float64
|
1222
1246
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1223
1247
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1224
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1225
|
-
|
1248
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1249
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1226
1250
|
for prob_func in PROB_FUNCTIONS:
|
1227
1251
|
if hasattr(self, prob_func):
|
1228
1252
|
output_cols_prefix: str = f"{prob_func}_"
|
1229
1253
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1230
1254
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1231
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1255
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1256
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1232
1257
|
|
1233
1258
|
@property
|
1234
1259
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -203,7 +205,6 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
203
205
|
sample_weight_col: Optional[str] = None,
|
204
206
|
) -> None:
|
205
207
|
super().__init__()
|
206
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
207
208
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
208
209
|
|
209
210
|
self._deps = list(deps)
|
@@ -226,6 +227,15 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
226
227
|
self.set_drop_input_cols(drop_input_cols)
|
227
228
|
self.set_sample_weight_col(sample_weight_col)
|
228
229
|
|
230
|
+
def _get_rand_id(self) -> str:
|
231
|
+
"""
|
232
|
+
Generate random id to be used in sproc and stage names.
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
Random id string usable in sproc, table, and stage names.
|
236
|
+
"""
|
237
|
+
return str(uuid4()).replace("-", "_").upper()
|
238
|
+
|
229
239
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
230
240
|
"""
|
231
241
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -304,7 +314,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
304
314
|
cp.dump(self._sklearn_object, local_transform_file)
|
305
315
|
|
306
316
|
# Create temp stage to run fit.
|
307
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
317
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
308
318
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
309
319
|
SqlResultValidator(
|
310
320
|
session=session,
|
@@ -317,11 +327,12 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
317
327
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
318
328
|
).validate()
|
319
329
|
|
320
|
-
|
330
|
+
# Use posixpath to construct stage paths
|
331
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
332
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
321
333
|
local_result_file_name = get_temp_file_path()
|
322
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
323
334
|
|
324
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
335
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
325
336
|
statement_params = telemetry.get_function_usage_statement_params(
|
326
337
|
project=_PROJECT,
|
327
338
|
subproject=_SUBPROJECT,
|
@@ -347,6 +358,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
347
358
|
replace=True,
|
348
359
|
session=session,
|
349
360
|
statement_params=statement_params,
|
361
|
+
anonymous=True
|
350
362
|
)
|
351
363
|
def fit_wrapper_sproc(
|
352
364
|
session: Session,
|
@@ -355,7 +367,8 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
355
367
|
stage_result_file_name: str,
|
356
368
|
input_cols: List[str],
|
357
369
|
label_cols: List[str],
|
358
|
-
sample_weight_col: Optional[str]
|
370
|
+
sample_weight_col: Optional[str],
|
371
|
+
statement_params: Dict[str, str]
|
359
372
|
) -> str:
|
360
373
|
import cloudpickle as cp
|
361
374
|
import numpy as np
|
@@ -422,15 +435,15 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
422
435
|
api_calls=[Session.call],
|
423
436
|
custom_tags=dict([("autogen", True)]),
|
424
437
|
)
|
425
|
-
sproc_export_file_name =
|
426
|
-
|
438
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
439
|
+
session,
|
427
440
|
query,
|
428
441
|
stage_transform_file_name,
|
429
442
|
stage_result_file_name,
|
430
443
|
identifier.get_unescaped_names(self.input_cols),
|
431
444
|
identifier.get_unescaped_names(self.label_cols),
|
432
445
|
identifier.get_unescaped_names(self.sample_weight_col),
|
433
|
-
statement_params
|
446
|
+
statement_params,
|
434
447
|
)
|
435
448
|
|
436
449
|
if "|" in sproc_export_file_name:
|
@@ -440,7 +453,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
440
453
|
print("\n".join(fields[1:]))
|
441
454
|
|
442
455
|
session.file.get(
|
443
|
-
|
456
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
444
457
|
local_result_file_name,
|
445
458
|
statement_params=statement_params
|
446
459
|
)
|
@@ -486,7 +499,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
486
499
|
|
487
500
|
# Register vectorized UDF for batch inference
|
488
501
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
489
|
-
safe_id=self.
|
502
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
490
503
|
|
491
504
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
492
505
|
# will try to pickle all of self which fails.
|
@@ -578,7 +591,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
578
591
|
return transformed_pandas_df.to_dict("records")
|
579
592
|
|
580
593
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
581
|
-
safe_id=self.
|
594
|
+
safe_id=self._get_rand_id()
|
582
595
|
)
|
583
596
|
|
584
597
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -745,11 +758,18 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
745
758
|
Transformed dataset.
|
746
759
|
"""
|
747
760
|
if isinstance(dataset, DataFrame):
|
761
|
+
expected_type_inferred = ""
|
762
|
+
# when it is classifier, infer the datatype from label columns
|
763
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
764
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
765
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
766
|
+
)
|
767
|
+
|
748
768
|
output_df = self._batch_inference(
|
749
769
|
dataset=dataset,
|
750
770
|
inference_method="predict",
|
751
771
|
expected_output_cols_list=self.output_cols,
|
752
|
-
expected_output_cols_type=
|
772
|
+
expected_output_cols_type=expected_type_inferred,
|
753
773
|
)
|
754
774
|
elif isinstance(dataset, pd.DataFrame):
|
755
775
|
output_df = self._sklearn_inference(
|
@@ -820,10 +840,10 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
820
840
|
|
821
841
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
822
842
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
823
|
-
Returns
|
843
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
824
844
|
"""
|
825
845
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
826
|
-
return []
|
846
|
+
return [output_cols_prefix]
|
827
847
|
|
828
848
|
classes = self._sklearn_object.classes_
|
829
849
|
if isinstance(classes, numpy.ndarray):
|
@@ -1054,7 +1074,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
1054
1074
|
cp.dump(self._sklearn_object, local_score_file)
|
1055
1075
|
|
1056
1076
|
# Create temp stage to run score.
|
1057
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1077
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1058
1078
|
session = dataset._session
|
1059
1079
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1060
1080
|
SqlResultValidator(
|
@@ -1068,8 +1088,9 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
1068
1088
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1069
1089
|
).validate()
|
1070
1090
|
|
1071
|
-
|
1072
|
-
|
1091
|
+
# Use posixpath to construct stage paths
|
1092
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1093
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1073
1094
|
statement_params = telemetry.get_function_usage_statement_params(
|
1074
1095
|
project=_PROJECT,
|
1075
1096
|
subproject=_SUBPROJECT,
|
@@ -1095,6 +1116,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
1095
1116
|
replace=True,
|
1096
1117
|
session=session,
|
1097
1118
|
statement_params=statement_params,
|
1119
|
+
anonymous=True
|
1098
1120
|
)
|
1099
1121
|
def score_wrapper_sproc(
|
1100
1122
|
session: Session,
|
@@ -1102,7 +1124,8 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
1102
1124
|
stage_score_file_name: str,
|
1103
1125
|
input_cols: List[str],
|
1104
1126
|
label_cols: List[str],
|
1105
|
-
sample_weight_col: Optional[str]
|
1127
|
+
sample_weight_col: Optional[str],
|
1128
|
+
statement_params: Dict[str, str]
|
1106
1129
|
) -> float:
|
1107
1130
|
import cloudpickle as cp
|
1108
1131
|
import numpy as np
|
@@ -1152,14 +1175,14 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
1152
1175
|
api_calls=[Session.call],
|
1153
1176
|
custom_tags=dict([("autogen", True)]),
|
1154
1177
|
)
|
1155
|
-
score =
|
1156
|
-
|
1178
|
+
score = score_wrapper_sproc(
|
1179
|
+
session,
|
1157
1180
|
query,
|
1158
1181
|
stage_score_file_name,
|
1159
1182
|
identifier.get_unescaped_names(self.input_cols),
|
1160
1183
|
identifier.get_unescaped_names(self.label_cols),
|
1161
1184
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1162
|
-
statement_params
|
1185
|
+
statement_params,
|
1163
1186
|
)
|
1164
1187
|
|
1165
1188
|
cleanup_temp_files([local_score_file_name])
|
@@ -1177,18 +1200,20 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
1177
1200
|
if self._sklearn_object._estimator_type == 'classifier':
|
1178
1201
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1179
1202
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1180
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1203
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1204
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1181
1205
|
# For regressor, the type of predict is float64
|
1182
1206
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1183
1207
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1184
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1185
|
-
|
1208
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1209
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1186
1210
|
for prob_func in PROB_FUNCTIONS:
|
1187
1211
|
if hasattr(self, prob_func):
|
1188
1212
|
output_cols_prefix: str = f"{prob_func}_"
|
1189
1213
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1190
1214
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1191
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1215
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1216
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1192
1217
|
|
1193
1218
|
@property
|
1194
1219
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|