snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -249,7 +251,6 @@ class FastICA(BaseTransformer):
|
|
249
251
|
sample_weight_col: Optional[str] = None,
|
250
252
|
) -> None:
|
251
253
|
super().__init__()
|
252
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
253
254
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
254
255
|
|
255
256
|
self._deps = list(deps)
|
@@ -278,6 +279,15 @@ class FastICA(BaseTransformer):
|
|
278
279
|
self.set_drop_input_cols(drop_input_cols)
|
279
280
|
self.set_sample_weight_col(sample_weight_col)
|
280
281
|
|
282
|
+
def _get_rand_id(self) -> str:
|
283
|
+
"""
|
284
|
+
Generate random id to be used in sproc and stage names.
|
285
|
+
|
286
|
+
Returns:
|
287
|
+
Random id string usable in sproc, table, and stage names.
|
288
|
+
"""
|
289
|
+
return str(uuid4()).replace("-", "_").upper()
|
290
|
+
|
281
291
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
282
292
|
"""
|
283
293
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -356,7 +366,7 @@ class FastICA(BaseTransformer):
|
|
356
366
|
cp.dump(self._sklearn_object, local_transform_file)
|
357
367
|
|
358
368
|
# Create temp stage to run fit.
|
359
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
369
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
360
370
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
361
371
|
SqlResultValidator(
|
362
372
|
session=session,
|
@@ -369,11 +379,12 @@ class FastICA(BaseTransformer):
|
|
369
379
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
370
380
|
).validate()
|
371
381
|
|
372
|
-
|
382
|
+
# Use posixpath to construct stage paths
|
383
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
384
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
373
385
|
local_result_file_name = get_temp_file_path()
|
374
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
375
386
|
|
376
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
387
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
377
388
|
statement_params = telemetry.get_function_usage_statement_params(
|
378
389
|
project=_PROJECT,
|
379
390
|
subproject=_SUBPROJECT,
|
@@ -399,6 +410,7 @@ class FastICA(BaseTransformer):
|
|
399
410
|
replace=True,
|
400
411
|
session=session,
|
401
412
|
statement_params=statement_params,
|
413
|
+
anonymous=True
|
402
414
|
)
|
403
415
|
def fit_wrapper_sproc(
|
404
416
|
session: Session,
|
@@ -407,7 +419,8 @@ class FastICA(BaseTransformer):
|
|
407
419
|
stage_result_file_name: str,
|
408
420
|
input_cols: List[str],
|
409
421
|
label_cols: List[str],
|
410
|
-
sample_weight_col: Optional[str]
|
422
|
+
sample_weight_col: Optional[str],
|
423
|
+
statement_params: Dict[str, str]
|
411
424
|
) -> str:
|
412
425
|
import cloudpickle as cp
|
413
426
|
import numpy as np
|
@@ -474,15 +487,15 @@ class FastICA(BaseTransformer):
|
|
474
487
|
api_calls=[Session.call],
|
475
488
|
custom_tags=dict([("autogen", True)]),
|
476
489
|
)
|
477
|
-
sproc_export_file_name =
|
478
|
-
|
490
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
491
|
+
session,
|
479
492
|
query,
|
480
493
|
stage_transform_file_name,
|
481
494
|
stage_result_file_name,
|
482
495
|
identifier.get_unescaped_names(self.input_cols),
|
483
496
|
identifier.get_unescaped_names(self.label_cols),
|
484
497
|
identifier.get_unescaped_names(self.sample_weight_col),
|
485
|
-
statement_params
|
498
|
+
statement_params,
|
486
499
|
)
|
487
500
|
|
488
501
|
if "|" in sproc_export_file_name:
|
@@ -492,7 +505,7 @@ class FastICA(BaseTransformer):
|
|
492
505
|
print("\n".join(fields[1:]))
|
493
506
|
|
494
507
|
session.file.get(
|
495
|
-
|
508
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
496
509
|
local_result_file_name,
|
497
510
|
statement_params=statement_params
|
498
511
|
)
|
@@ -538,7 +551,7 @@ class FastICA(BaseTransformer):
|
|
538
551
|
|
539
552
|
# Register vectorized UDF for batch inference
|
540
553
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
541
|
-
safe_id=self.
|
554
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
542
555
|
|
543
556
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
544
557
|
# will try to pickle all of self which fails.
|
@@ -630,7 +643,7 @@ class FastICA(BaseTransformer):
|
|
630
643
|
return transformed_pandas_df.to_dict("records")
|
631
644
|
|
632
645
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
633
|
-
safe_id=self.
|
646
|
+
safe_id=self._get_rand_id()
|
634
647
|
)
|
635
648
|
|
636
649
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -795,11 +808,18 @@ class FastICA(BaseTransformer):
|
|
795
808
|
Transformed dataset.
|
796
809
|
"""
|
797
810
|
if isinstance(dataset, DataFrame):
|
811
|
+
expected_type_inferred = ""
|
812
|
+
# when it is classifier, infer the datatype from label columns
|
813
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
814
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
815
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
816
|
+
)
|
817
|
+
|
798
818
|
output_df = self._batch_inference(
|
799
819
|
dataset=dataset,
|
800
820
|
inference_method="predict",
|
801
821
|
expected_output_cols_list=self.output_cols,
|
802
|
-
expected_output_cols_type=
|
822
|
+
expected_output_cols_type=expected_type_inferred,
|
803
823
|
)
|
804
824
|
elif isinstance(dataset, pd.DataFrame):
|
805
825
|
output_df = self._sklearn_inference(
|
@@ -872,10 +892,10 @@ class FastICA(BaseTransformer):
|
|
872
892
|
|
873
893
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
874
894
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
875
|
-
Returns
|
895
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
876
896
|
"""
|
877
897
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
878
|
-
return []
|
898
|
+
return [output_cols_prefix]
|
879
899
|
|
880
900
|
classes = self._sklearn_object.classes_
|
881
901
|
if isinstance(classes, numpy.ndarray):
|
@@ -1100,7 +1120,7 @@ class FastICA(BaseTransformer):
|
|
1100
1120
|
cp.dump(self._sklearn_object, local_score_file)
|
1101
1121
|
|
1102
1122
|
# Create temp stage to run score.
|
1103
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1123
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1104
1124
|
session = dataset._session
|
1105
1125
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1106
1126
|
SqlResultValidator(
|
@@ -1114,8 +1134,9 @@ class FastICA(BaseTransformer):
|
|
1114
1134
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1115
1135
|
).validate()
|
1116
1136
|
|
1117
|
-
|
1118
|
-
|
1137
|
+
# Use posixpath to construct stage paths
|
1138
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1139
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1119
1140
|
statement_params = telemetry.get_function_usage_statement_params(
|
1120
1141
|
project=_PROJECT,
|
1121
1142
|
subproject=_SUBPROJECT,
|
@@ -1141,6 +1162,7 @@ class FastICA(BaseTransformer):
|
|
1141
1162
|
replace=True,
|
1142
1163
|
session=session,
|
1143
1164
|
statement_params=statement_params,
|
1165
|
+
anonymous=True
|
1144
1166
|
)
|
1145
1167
|
def score_wrapper_sproc(
|
1146
1168
|
session: Session,
|
@@ -1148,7 +1170,8 @@ class FastICA(BaseTransformer):
|
|
1148
1170
|
stage_score_file_name: str,
|
1149
1171
|
input_cols: List[str],
|
1150
1172
|
label_cols: List[str],
|
1151
|
-
sample_weight_col: Optional[str]
|
1173
|
+
sample_weight_col: Optional[str],
|
1174
|
+
statement_params: Dict[str, str]
|
1152
1175
|
) -> float:
|
1153
1176
|
import cloudpickle as cp
|
1154
1177
|
import numpy as np
|
@@ -1198,14 +1221,14 @@ class FastICA(BaseTransformer):
|
|
1198
1221
|
api_calls=[Session.call],
|
1199
1222
|
custom_tags=dict([("autogen", True)]),
|
1200
1223
|
)
|
1201
|
-
score =
|
1202
|
-
|
1224
|
+
score = score_wrapper_sproc(
|
1225
|
+
session,
|
1203
1226
|
query,
|
1204
1227
|
stage_score_file_name,
|
1205
1228
|
identifier.get_unescaped_names(self.input_cols),
|
1206
1229
|
identifier.get_unescaped_names(self.label_cols),
|
1207
1230
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1208
|
-
statement_params
|
1231
|
+
statement_params,
|
1209
1232
|
)
|
1210
1233
|
|
1211
1234
|
cleanup_temp_files([local_score_file_name])
|
@@ -1223,18 +1246,20 @@ class FastICA(BaseTransformer):
|
|
1223
1246
|
if self._sklearn_object._estimator_type == 'classifier':
|
1224
1247
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1225
1248
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1226
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1249
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1250
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1227
1251
|
# For regressor, the type of predict is float64
|
1228
1252
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1229
1253
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1230
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1231
|
-
|
1254
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1255
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1232
1256
|
for prob_func in PROB_FUNCTIONS:
|
1233
1257
|
if hasattr(self, prob_func):
|
1234
1258
|
output_cols_prefix: str = f"{prob_func}_"
|
1235
1259
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1236
1260
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1237
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1261
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1262
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1238
1263
|
|
1239
1264
|
@property
|
1240
1265
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -207,7 +209,6 @@ class IncrementalPCA(BaseTransformer):
|
|
207
209
|
sample_weight_col: Optional[str] = None,
|
208
210
|
) -> None:
|
209
211
|
super().__init__()
|
210
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
211
212
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
212
213
|
|
213
214
|
self._deps = list(deps)
|
@@ -230,6 +231,15 @@ class IncrementalPCA(BaseTransformer):
|
|
230
231
|
self.set_drop_input_cols(drop_input_cols)
|
231
232
|
self.set_sample_weight_col(sample_weight_col)
|
232
233
|
|
234
|
+
def _get_rand_id(self) -> str:
|
235
|
+
"""
|
236
|
+
Generate random id to be used in sproc and stage names.
|
237
|
+
|
238
|
+
Returns:
|
239
|
+
Random id string usable in sproc, table, and stage names.
|
240
|
+
"""
|
241
|
+
return str(uuid4()).replace("-", "_").upper()
|
242
|
+
|
233
243
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
234
244
|
"""
|
235
245
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -308,7 +318,7 @@ class IncrementalPCA(BaseTransformer):
|
|
308
318
|
cp.dump(self._sklearn_object, local_transform_file)
|
309
319
|
|
310
320
|
# Create temp stage to run fit.
|
311
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
321
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
312
322
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
313
323
|
SqlResultValidator(
|
314
324
|
session=session,
|
@@ -321,11 +331,12 @@ class IncrementalPCA(BaseTransformer):
|
|
321
331
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
322
332
|
).validate()
|
323
333
|
|
324
|
-
|
334
|
+
# Use posixpath to construct stage paths
|
335
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
336
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
325
337
|
local_result_file_name = get_temp_file_path()
|
326
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
327
338
|
|
328
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
339
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
329
340
|
statement_params = telemetry.get_function_usage_statement_params(
|
330
341
|
project=_PROJECT,
|
331
342
|
subproject=_SUBPROJECT,
|
@@ -351,6 +362,7 @@ class IncrementalPCA(BaseTransformer):
|
|
351
362
|
replace=True,
|
352
363
|
session=session,
|
353
364
|
statement_params=statement_params,
|
365
|
+
anonymous=True
|
354
366
|
)
|
355
367
|
def fit_wrapper_sproc(
|
356
368
|
session: Session,
|
@@ -359,7 +371,8 @@ class IncrementalPCA(BaseTransformer):
|
|
359
371
|
stage_result_file_name: str,
|
360
372
|
input_cols: List[str],
|
361
373
|
label_cols: List[str],
|
362
|
-
sample_weight_col: Optional[str]
|
374
|
+
sample_weight_col: Optional[str],
|
375
|
+
statement_params: Dict[str, str]
|
363
376
|
) -> str:
|
364
377
|
import cloudpickle as cp
|
365
378
|
import numpy as np
|
@@ -426,15 +439,15 @@ class IncrementalPCA(BaseTransformer):
|
|
426
439
|
api_calls=[Session.call],
|
427
440
|
custom_tags=dict([("autogen", True)]),
|
428
441
|
)
|
429
|
-
sproc_export_file_name =
|
430
|
-
|
442
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
443
|
+
session,
|
431
444
|
query,
|
432
445
|
stage_transform_file_name,
|
433
446
|
stage_result_file_name,
|
434
447
|
identifier.get_unescaped_names(self.input_cols),
|
435
448
|
identifier.get_unescaped_names(self.label_cols),
|
436
449
|
identifier.get_unescaped_names(self.sample_weight_col),
|
437
|
-
statement_params
|
450
|
+
statement_params,
|
438
451
|
)
|
439
452
|
|
440
453
|
if "|" in sproc_export_file_name:
|
@@ -444,7 +457,7 @@ class IncrementalPCA(BaseTransformer):
|
|
444
457
|
print("\n".join(fields[1:]))
|
445
458
|
|
446
459
|
session.file.get(
|
447
|
-
|
460
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
448
461
|
local_result_file_name,
|
449
462
|
statement_params=statement_params
|
450
463
|
)
|
@@ -490,7 +503,7 @@ class IncrementalPCA(BaseTransformer):
|
|
490
503
|
|
491
504
|
# Register vectorized UDF for batch inference
|
492
505
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
493
|
-
safe_id=self.
|
506
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
494
507
|
|
495
508
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
496
509
|
# will try to pickle all of self which fails.
|
@@ -582,7 +595,7 @@ class IncrementalPCA(BaseTransformer):
|
|
582
595
|
return transformed_pandas_df.to_dict("records")
|
583
596
|
|
584
597
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
585
|
-
safe_id=self.
|
598
|
+
safe_id=self._get_rand_id()
|
586
599
|
)
|
587
600
|
|
588
601
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -747,11 +760,18 @@ class IncrementalPCA(BaseTransformer):
|
|
747
760
|
Transformed dataset.
|
748
761
|
"""
|
749
762
|
if isinstance(dataset, DataFrame):
|
763
|
+
expected_type_inferred = ""
|
764
|
+
# when it is classifier, infer the datatype from label columns
|
765
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
766
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
767
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
768
|
+
)
|
769
|
+
|
750
770
|
output_df = self._batch_inference(
|
751
771
|
dataset=dataset,
|
752
772
|
inference_method="predict",
|
753
773
|
expected_output_cols_list=self.output_cols,
|
754
|
-
expected_output_cols_type=
|
774
|
+
expected_output_cols_type=expected_type_inferred,
|
755
775
|
)
|
756
776
|
elif isinstance(dataset, pd.DataFrame):
|
757
777
|
output_df = self._sklearn_inference(
|
@@ -824,10 +844,10 @@ class IncrementalPCA(BaseTransformer):
|
|
824
844
|
|
825
845
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
826
846
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
827
|
-
Returns
|
847
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
828
848
|
"""
|
829
849
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
830
|
-
return []
|
850
|
+
return [output_cols_prefix]
|
831
851
|
|
832
852
|
classes = self._sklearn_object.classes_
|
833
853
|
if isinstance(classes, numpy.ndarray):
|
@@ -1052,7 +1072,7 @@ class IncrementalPCA(BaseTransformer):
|
|
1052
1072
|
cp.dump(self._sklearn_object, local_score_file)
|
1053
1073
|
|
1054
1074
|
# Create temp stage to run score.
|
1055
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1075
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1056
1076
|
session = dataset._session
|
1057
1077
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1058
1078
|
SqlResultValidator(
|
@@ -1066,8 +1086,9 @@ class IncrementalPCA(BaseTransformer):
|
|
1066
1086
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1067
1087
|
).validate()
|
1068
1088
|
|
1069
|
-
|
1070
|
-
|
1089
|
+
# Use posixpath to construct stage paths
|
1090
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1091
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1071
1092
|
statement_params = telemetry.get_function_usage_statement_params(
|
1072
1093
|
project=_PROJECT,
|
1073
1094
|
subproject=_SUBPROJECT,
|
@@ -1093,6 +1114,7 @@ class IncrementalPCA(BaseTransformer):
|
|
1093
1114
|
replace=True,
|
1094
1115
|
session=session,
|
1095
1116
|
statement_params=statement_params,
|
1117
|
+
anonymous=True
|
1096
1118
|
)
|
1097
1119
|
def score_wrapper_sproc(
|
1098
1120
|
session: Session,
|
@@ -1100,7 +1122,8 @@ class IncrementalPCA(BaseTransformer):
|
|
1100
1122
|
stage_score_file_name: str,
|
1101
1123
|
input_cols: List[str],
|
1102
1124
|
label_cols: List[str],
|
1103
|
-
sample_weight_col: Optional[str]
|
1125
|
+
sample_weight_col: Optional[str],
|
1126
|
+
statement_params: Dict[str, str]
|
1104
1127
|
) -> float:
|
1105
1128
|
import cloudpickle as cp
|
1106
1129
|
import numpy as np
|
@@ -1150,14 +1173,14 @@ class IncrementalPCA(BaseTransformer):
|
|
1150
1173
|
api_calls=[Session.call],
|
1151
1174
|
custom_tags=dict([("autogen", True)]),
|
1152
1175
|
)
|
1153
|
-
score =
|
1154
|
-
|
1176
|
+
score = score_wrapper_sproc(
|
1177
|
+
session,
|
1155
1178
|
query,
|
1156
1179
|
stage_score_file_name,
|
1157
1180
|
identifier.get_unescaped_names(self.input_cols),
|
1158
1181
|
identifier.get_unescaped_names(self.label_cols),
|
1159
1182
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1160
|
-
statement_params
|
1183
|
+
statement_params,
|
1161
1184
|
)
|
1162
1185
|
|
1163
1186
|
cleanup_temp_files([local_score_file_name])
|
@@ -1175,18 +1198,20 @@ class IncrementalPCA(BaseTransformer):
|
|
1175
1198
|
if self._sklearn_object._estimator_type == 'classifier':
|
1176
1199
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1177
1200
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1178
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1201
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1202
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1179
1203
|
# For regressor, the type of predict is float64
|
1180
1204
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1181
1205
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1182
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1183
|
-
|
1206
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1207
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1184
1208
|
for prob_func in PROB_FUNCTIONS:
|
1185
1209
|
if hasattr(self, prob_func):
|
1186
1210
|
output_cols_prefix: str = f"{prob_func}_"
|
1187
1211
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1188
1212
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1189
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1213
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1214
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1190
1215
|
|
1191
1216
|
@property
|
1192
1217
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|