snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -282,7 +284,6 @@ class DictionaryLearning(BaseTransformer):
|
|
282
284
|
sample_weight_col: Optional[str] = None,
|
283
285
|
) -> None:
|
284
286
|
super().__init__()
|
285
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
286
287
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
287
288
|
|
288
289
|
self._deps = list(deps)
|
@@ -318,6 +319,15 @@ class DictionaryLearning(BaseTransformer):
|
|
318
319
|
self.set_drop_input_cols(drop_input_cols)
|
319
320
|
self.set_sample_weight_col(sample_weight_col)
|
320
321
|
|
322
|
+
def _get_rand_id(self) -> str:
|
323
|
+
"""
|
324
|
+
Generate random id to be used in sproc and stage names.
|
325
|
+
|
326
|
+
Returns:
|
327
|
+
Random id string usable in sproc, table, and stage names.
|
328
|
+
"""
|
329
|
+
return str(uuid4()).replace("-", "_").upper()
|
330
|
+
|
321
331
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
322
332
|
"""
|
323
333
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -396,7 +406,7 @@ class DictionaryLearning(BaseTransformer):
|
|
396
406
|
cp.dump(self._sklearn_object, local_transform_file)
|
397
407
|
|
398
408
|
# Create temp stage to run fit.
|
399
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
409
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
400
410
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
401
411
|
SqlResultValidator(
|
402
412
|
session=session,
|
@@ -409,11 +419,12 @@ class DictionaryLearning(BaseTransformer):
|
|
409
419
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
410
420
|
).validate()
|
411
421
|
|
412
|
-
|
422
|
+
# Use posixpath to construct stage paths
|
423
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
424
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
413
425
|
local_result_file_name = get_temp_file_path()
|
414
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
415
426
|
|
416
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
427
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
417
428
|
statement_params = telemetry.get_function_usage_statement_params(
|
418
429
|
project=_PROJECT,
|
419
430
|
subproject=_SUBPROJECT,
|
@@ -439,6 +450,7 @@ class DictionaryLearning(BaseTransformer):
|
|
439
450
|
replace=True,
|
440
451
|
session=session,
|
441
452
|
statement_params=statement_params,
|
453
|
+
anonymous=True
|
442
454
|
)
|
443
455
|
def fit_wrapper_sproc(
|
444
456
|
session: Session,
|
@@ -447,7 +459,8 @@ class DictionaryLearning(BaseTransformer):
|
|
447
459
|
stage_result_file_name: str,
|
448
460
|
input_cols: List[str],
|
449
461
|
label_cols: List[str],
|
450
|
-
sample_weight_col: Optional[str]
|
462
|
+
sample_weight_col: Optional[str],
|
463
|
+
statement_params: Dict[str, str]
|
451
464
|
) -> str:
|
452
465
|
import cloudpickle as cp
|
453
466
|
import numpy as np
|
@@ -514,15 +527,15 @@ class DictionaryLearning(BaseTransformer):
|
|
514
527
|
api_calls=[Session.call],
|
515
528
|
custom_tags=dict([("autogen", True)]),
|
516
529
|
)
|
517
|
-
sproc_export_file_name =
|
518
|
-
|
530
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
531
|
+
session,
|
519
532
|
query,
|
520
533
|
stage_transform_file_name,
|
521
534
|
stage_result_file_name,
|
522
535
|
identifier.get_unescaped_names(self.input_cols),
|
523
536
|
identifier.get_unescaped_names(self.label_cols),
|
524
537
|
identifier.get_unescaped_names(self.sample_weight_col),
|
525
|
-
statement_params
|
538
|
+
statement_params,
|
526
539
|
)
|
527
540
|
|
528
541
|
if "|" in sproc_export_file_name:
|
@@ -532,7 +545,7 @@ class DictionaryLearning(BaseTransformer):
|
|
532
545
|
print("\n".join(fields[1:]))
|
533
546
|
|
534
547
|
session.file.get(
|
535
|
-
|
548
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
536
549
|
local_result_file_name,
|
537
550
|
statement_params=statement_params
|
538
551
|
)
|
@@ -578,7 +591,7 @@ class DictionaryLearning(BaseTransformer):
|
|
578
591
|
|
579
592
|
# Register vectorized UDF for batch inference
|
580
593
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
581
|
-
safe_id=self.
|
594
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
582
595
|
|
583
596
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
584
597
|
# will try to pickle all of self which fails.
|
@@ -670,7 +683,7 @@ class DictionaryLearning(BaseTransformer):
|
|
670
683
|
return transformed_pandas_df.to_dict("records")
|
671
684
|
|
672
685
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
673
|
-
safe_id=self.
|
686
|
+
safe_id=self._get_rand_id()
|
674
687
|
)
|
675
688
|
|
676
689
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -835,11 +848,18 @@ class DictionaryLearning(BaseTransformer):
|
|
835
848
|
Transformed dataset.
|
836
849
|
"""
|
837
850
|
if isinstance(dataset, DataFrame):
|
851
|
+
expected_type_inferred = ""
|
852
|
+
# when it is classifier, infer the datatype from label columns
|
853
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
854
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
855
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
856
|
+
)
|
857
|
+
|
838
858
|
output_df = self._batch_inference(
|
839
859
|
dataset=dataset,
|
840
860
|
inference_method="predict",
|
841
861
|
expected_output_cols_list=self.output_cols,
|
842
|
-
expected_output_cols_type=
|
862
|
+
expected_output_cols_type=expected_type_inferred,
|
843
863
|
)
|
844
864
|
elif isinstance(dataset, pd.DataFrame):
|
845
865
|
output_df = self._sklearn_inference(
|
@@ -912,10 +932,10 @@ class DictionaryLearning(BaseTransformer):
|
|
912
932
|
|
913
933
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
914
934
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
915
|
-
Returns
|
935
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
916
936
|
"""
|
917
937
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
918
|
-
return []
|
938
|
+
return [output_cols_prefix]
|
919
939
|
|
920
940
|
classes = self._sklearn_object.classes_
|
921
941
|
if isinstance(classes, numpy.ndarray):
|
@@ -1140,7 +1160,7 @@ class DictionaryLearning(BaseTransformer):
|
|
1140
1160
|
cp.dump(self._sklearn_object, local_score_file)
|
1141
1161
|
|
1142
1162
|
# Create temp stage to run score.
|
1143
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1163
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1144
1164
|
session = dataset._session
|
1145
1165
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1146
1166
|
SqlResultValidator(
|
@@ -1154,8 +1174,9 @@ class DictionaryLearning(BaseTransformer):
|
|
1154
1174
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1155
1175
|
).validate()
|
1156
1176
|
|
1157
|
-
|
1158
|
-
|
1177
|
+
# Use posixpath to construct stage paths
|
1178
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1179
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1159
1180
|
statement_params = telemetry.get_function_usage_statement_params(
|
1160
1181
|
project=_PROJECT,
|
1161
1182
|
subproject=_SUBPROJECT,
|
@@ -1181,6 +1202,7 @@ class DictionaryLearning(BaseTransformer):
|
|
1181
1202
|
replace=True,
|
1182
1203
|
session=session,
|
1183
1204
|
statement_params=statement_params,
|
1205
|
+
anonymous=True
|
1184
1206
|
)
|
1185
1207
|
def score_wrapper_sproc(
|
1186
1208
|
session: Session,
|
@@ -1188,7 +1210,8 @@ class DictionaryLearning(BaseTransformer):
|
|
1188
1210
|
stage_score_file_name: str,
|
1189
1211
|
input_cols: List[str],
|
1190
1212
|
label_cols: List[str],
|
1191
|
-
sample_weight_col: Optional[str]
|
1213
|
+
sample_weight_col: Optional[str],
|
1214
|
+
statement_params: Dict[str, str]
|
1192
1215
|
) -> float:
|
1193
1216
|
import cloudpickle as cp
|
1194
1217
|
import numpy as np
|
@@ -1238,14 +1261,14 @@ class DictionaryLearning(BaseTransformer):
|
|
1238
1261
|
api_calls=[Session.call],
|
1239
1262
|
custom_tags=dict([("autogen", True)]),
|
1240
1263
|
)
|
1241
|
-
score =
|
1242
|
-
|
1264
|
+
score = score_wrapper_sproc(
|
1265
|
+
session,
|
1243
1266
|
query,
|
1244
1267
|
stage_score_file_name,
|
1245
1268
|
identifier.get_unescaped_names(self.input_cols),
|
1246
1269
|
identifier.get_unescaped_names(self.label_cols),
|
1247
1270
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1248
|
-
statement_params
|
1271
|
+
statement_params,
|
1249
1272
|
)
|
1250
1273
|
|
1251
1274
|
cleanup_temp_files([local_score_file_name])
|
@@ -1263,18 +1286,20 @@ class DictionaryLearning(BaseTransformer):
|
|
1263
1286
|
if self._sklearn_object._estimator_type == 'classifier':
|
1264
1287
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1265
1288
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1266
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1289
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1290
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1267
1291
|
# For regressor, the type of predict is float64
|
1268
1292
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1269
1293
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1270
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1271
|
-
|
1294
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1295
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1272
1296
|
for prob_func in PROB_FUNCTIONS:
|
1273
1297
|
if hasattr(self, prob_func):
|
1274
1298
|
output_cols_prefix: str = f"{prob_func}_"
|
1275
1299
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1276
1300
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1277
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1301
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1302
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1278
1303
|
|
1279
1304
|
@property
|
1280
1305
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -232,7 +234,6 @@ class FactorAnalysis(BaseTransformer):
|
|
232
234
|
sample_weight_col: Optional[str] = None,
|
233
235
|
) -> None:
|
234
236
|
super().__init__()
|
235
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
236
237
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
237
238
|
|
238
239
|
self._deps = list(deps)
|
@@ -260,6 +261,15 @@ class FactorAnalysis(BaseTransformer):
|
|
260
261
|
self.set_drop_input_cols(drop_input_cols)
|
261
262
|
self.set_sample_weight_col(sample_weight_col)
|
262
263
|
|
264
|
+
def _get_rand_id(self) -> str:
|
265
|
+
"""
|
266
|
+
Generate random id to be used in sproc and stage names.
|
267
|
+
|
268
|
+
Returns:
|
269
|
+
Random id string usable in sproc, table, and stage names.
|
270
|
+
"""
|
271
|
+
return str(uuid4()).replace("-", "_").upper()
|
272
|
+
|
263
273
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
264
274
|
"""
|
265
275
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -338,7 +348,7 @@ class FactorAnalysis(BaseTransformer):
|
|
338
348
|
cp.dump(self._sklearn_object, local_transform_file)
|
339
349
|
|
340
350
|
# Create temp stage to run fit.
|
341
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
351
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
342
352
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
343
353
|
SqlResultValidator(
|
344
354
|
session=session,
|
@@ -351,11 +361,12 @@ class FactorAnalysis(BaseTransformer):
|
|
351
361
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
352
362
|
).validate()
|
353
363
|
|
354
|
-
|
364
|
+
# Use posixpath to construct stage paths
|
365
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
366
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
355
367
|
local_result_file_name = get_temp_file_path()
|
356
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
357
368
|
|
358
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
369
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
359
370
|
statement_params = telemetry.get_function_usage_statement_params(
|
360
371
|
project=_PROJECT,
|
361
372
|
subproject=_SUBPROJECT,
|
@@ -381,6 +392,7 @@ class FactorAnalysis(BaseTransformer):
|
|
381
392
|
replace=True,
|
382
393
|
session=session,
|
383
394
|
statement_params=statement_params,
|
395
|
+
anonymous=True
|
384
396
|
)
|
385
397
|
def fit_wrapper_sproc(
|
386
398
|
session: Session,
|
@@ -389,7 +401,8 @@ class FactorAnalysis(BaseTransformer):
|
|
389
401
|
stage_result_file_name: str,
|
390
402
|
input_cols: List[str],
|
391
403
|
label_cols: List[str],
|
392
|
-
sample_weight_col: Optional[str]
|
404
|
+
sample_weight_col: Optional[str],
|
405
|
+
statement_params: Dict[str, str]
|
393
406
|
) -> str:
|
394
407
|
import cloudpickle as cp
|
395
408
|
import numpy as np
|
@@ -456,15 +469,15 @@ class FactorAnalysis(BaseTransformer):
|
|
456
469
|
api_calls=[Session.call],
|
457
470
|
custom_tags=dict([("autogen", True)]),
|
458
471
|
)
|
459
|
-
sproc_export_file_name =
|
460
|
-
|
472
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
473
|
+
session,
|
461
474
|
query,
|
462
475
|
stage_transform_file_name,
|
463
476
|
stage_result_file_name,
|
464
477
|
identifier.get_unescaped_names(self.input_cols),
|
465
478
|
identifier.get_unescaped_names(self.label_cols),
|
466
479
|
identifier.get_unescaped_names(self.sample_weight_col),
|
467
|
-
statement_params
|
480
|
+
statement_params,
|
468
481
|
)
|
469
482
|
|
470
483
|
if "|" in sproc_export_file_name:
|
@@ -474,7 +487,7 @@ class FactorAnalysis(BaseTransformer):
|
|
474
487
|
print("\n".join(fields[1:]))
|
475
488
|
|
476
489
|
session.file.get(
|
477
|
-
|
490
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
478
491
|
local_result_file_name,
|
479
492
|
statement_params=statement_params
|
480
493
|
)
|
@@ -520,7 +533,7 @@ class FactorAnalysis(BaseTransformer):
|
|
520
533
|
|
521
534
|
# Register vectorized UDF for batch inference
|
522
535
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
523
|
-
safe_id=self.
|
536
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
524
537
|
|
525
538
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
526
539
|
# will try to pickle all of self which fails.
|
@@ -612,7 +625,7 @@ class FactorAnalysis(BaseTransformer):
|
|
612
625
|
return transformed_pandas_df.to_dict("records")
|
613
626
|
|
614
627
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
615
|
-
safe_id=self.
|
628
|
+
safe_id=self._get_rand_id()
|
616
629
|
)
|
617
630
|
|
618
631
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -777,11 +790,18 @@ class FactorAnalysis(BaseTransformer):
|
|
777
790
|
Transformed dataset.
|
778
791
|
"""
|
779
792
|
if isinstance(dataset, DataFrame):
|
793
|
+
expected_type_inferred = ""
|
794
|
+
# when it is classifier, infer the datatype from label columns
|
795
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
796
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
797
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
798
|
+
)
|
799
|
+
|
780
800
|
output_df = self._batch_inference(
|
781
801
|
dataset=dataset,
|
782
802
|
inference_method="predict",
|
783
803
|
expected_output_cols_list=self.output_cols,
|
784
|
-
expected_output_cols_type=
|
804
|
+
expected_output_cols_type=expected_type_inferred,
|
785
805
|
)
|
786
806
|
elif isinstance(dataset, pd.DataFrame):
|
787
807
|
output_df = self._sklearn_inference(
|
@@ -854,10 +874,10 @@ class FactorAnalysis(BaseTransformer):
|
|
854
874
|
|
855
875
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
856
876
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
857
|
-
Returns
|
877
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
858
878
|
"""
|
859
879
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
860
|
-
return []
|
880
|
+
return [output_cols_prefix]
|
861
881
|
|
862
882
|
classes = self._sklearn_object.classes_
|
863
883
|
if isinstance(classes, numpy.ndarray):
|
@@ -1082,7 +1102,7 @@ class FactorAnalysis(BaseTransformer):
|
|
1082
1102
|
cp.dump(self._sklearn_object, local_score_file)
|
1083
1103
|
|
1084
1104
|
# Create temp stage to run score.
|
1085
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1105
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1086
1106
|
session = dataset._session
|
1087
1107
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1088
1108
|
SqlResultValidator(
|
@@ -1096,8 +1116,9 @@ class FactorAnalysis(BaseTransformer):
|
|
1096
1116
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1097
1117
|
).validate()
|
1098
1118
|
|
1099
|
-
|
1100
|
-
|
1119
|
+
# Use posixpath to construct stage paths
|
1120
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1121
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1101
1122
|
statement_params = telemetry.get_function_usage_statement_params(
|
1102
1123
|
project=_PROJECT,
|
1103
1124
|
subproject=_SUBPROJECT,
|
@@ -1123,6 +1144,7 @@ class FactorAnalysis(BaseTransformer):
|
|
1123
1144
|
replace=True,
|
1124
1145
|
session=session,
|
1125
1146
|
statement_params=statement_params,
|
1147
|
+
anonymous=True
|
1126
1148
|
)
|
1127
1149
|
def score_wrapper_sproc(
|
1128
1150
|
session: Session,
|
@@ -1130,7 +1152,8 @@ class FactorAnalysis(BaseTransformer):
|
|
1130
1152
|
stage_score_file_name: str,
|
1131
1153
|
input_cols: List[str],
|
1132
1154
|
label_cols: List[str],
|
1133
|
-
sample_weight_col: Optional[str]
|
1155
|
+
sample_weight_col: Optional[str],
|
1156
|
+
statement_params: Dict[str, str]
|
1134
1157
|
) -> float:
|
1135
1158
|
import cloudpickle as cp
|
1136
1159
|
import numpy as np
|
@@ -1180,14 +1203,14 @@ class FactorAnalysis(BaseTransformer):
|
|
1180
1203
|
api_calls=[Session.call],
|
1181
1204
|
custom_tags=dict([("autogen", True)]),
|
1182
1205
|
)
|
1183
|
-
score =
|
1184
|
-
|
1206
|
+
score = score_wrapper_sproc(
|
1207
|
+
session,
|
1185
1208
|
query,
|
1186
1209
|
stage_score_file_name,
|
1187
1210
|
identifier.get_unescaped_names(self.input_cols),
|
1188
1211
|
identifier.get_unescaped_names(self.label_cols),
|
1189
1212
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1190
|
-
statement_params
|
1213
|
+
statement_params,
|
1191
1214
|
)
|
1192
1215
|
|
1193
1216
|
cleanup_temp_files([local_score_file_name])
|
@@ -1205,18 +1228,20 @@ class FactorAnalysis(BaseTransformer):
|
|
1205
1228
|
if self._sklearn_object._estimator_type == 'classifier':
|
1206
1229
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1207
1230
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1208
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1231
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1232
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1209
1233
|
# For regressor, the type of predict is float64
|
1210
1234
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1211
1235
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1212
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1213
|
-
|
1236
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1237
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1214
1238
|
for prob_func in PROB_FUNCTIONS:
|
1215
1239
|
if hasattr(self, prob_func):
|
1216
1240
|
output_cols_prefix: str = f"{prob_func}_"
|
1217
1241
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1218
1242
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1219
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1243
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1244
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1220
1245
|
|
1221
1246
|
@property
|
1222
1247
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|