snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -352,7 +354,6 @@ class MLPClassifier(BaseTransformer):
|
|
352
354
|
sample_weight_col: Optional[str] = None,
|
353
355
|
) -> None:
|
354
356
|
super().__init__()
|
355
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
356
357
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
357
358
|
|
358
359
|
self._deps = list(deps)
|
@@ -394,6 +395,15 @@ class MLPClassifier(BaseTransformer):
|
|
394
395
|
self.set_drop_input_cols(drop_input_cols)
|
395
396
|
self.set_sample_weight_col(sample_weight_col)
|
396
397
|
|
398
|
+
def _get_rand_id(self) -> str:
|
399
|
+
"""
|
400
|
+
Generate random id to be used in sproc and stage names.
|
401
|
+
|
402
|
+
Returns:
|
403
|
+
Random id string usable in sproc, table, and stage names.
|
404
|
+
"""
|
405
|
+
return str(uuid4()).replace("-", "_").upper()
|
406
|
+
|
397
407
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
398
408
|
"""
|
399
409
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -472,7 +482,7 @@ class MLPClassifier(BaseTransformer):
|
|
472
482
|
cp.dump(self._sklearn_object, local_transform_file)
|
473
483
|
|
474
484
|
# Create temp stage to run fit.
|
475
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
485
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
476
486
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
477
487
|
SqlResultValidator(
|
478
488
|
session=session,
|
@@ -485,11 +495,12 @@ class MLPClassifier(BaseTransformer):
|
|
485
495
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
486
496
|
).validate()
|
487
497
|
|
488
|
-
|
498
|
+
# Use posixpath to construct stage paths
|
499
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
500
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
489
501
|
local_result_file_name = get_temp_file_path()
|
490
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
491
502
|
|
492
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
503
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
493
504
|
statement_params = telemetry.get_function_usage_statement_params(
|
494
505
|
project=_PROJECT,
|
495
506
|
subproject=_SUBPROJECT,
|
@@ -515,6 +526,7 @@ class MLPClassifier(BaseTransformer):
|
|
515
526
|
replace=True,
|
516
527
|
session=session,
|
517
528
|
statement_params=statement_params,
|
529
|
+
anonymous=True
|
518
530
|
)
|
519
531
|
def fit_wrapper_sproc(
|
520
532
|
session: Session,
|
@@ -523,7 +535,8 @@ class MLPClassifier(BaseTransformer):
|
|
523
535
|
stage_result_file_name: str,
|
524
536
|
input_cols: List[str],
|
525
537
|
label_cols: List[str],
|
526
|
-
sample_weight_col: Optional[str]
|
538
|
+
sample_weight_col: Optional[str],
|
539
|
+
statement_params: Dict[str, str]
|
527
540
|
) -> str:
|
528
541
|
import cloudpickle as cp
|
529
542
|
import numpy as np
|
@@ -590,15 +603,15 @@ class MLPClassifier(BaseTransformer):
|
|
590
603
|
api_calls=[Session.call],
|
591
604
|
custom_tags=dict([("autogen", True)]),
|
592
605
|
)
|
593
|
-
sproc_export_file_name =
|
594
|
-
|
606
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
607
|
+
session,
|
595
608
|
query,
|
596
609
|
stage_transform_file_name,
|
597
610
|
stage_result_file_name,
|
598
611
|
identifier.get_unescaped_names(self.input_cols),
|
599
612
|
identifier.get_unescaped_names(self.label_cols),
|
600
613
|
identifier.get_unescaped_names(self.sample_weight_col),
|
601
|
-
statement_params
|
614
|
+
statement_params,
|
602
615
|
)
|
603
616
|
|
604
617
|
if "|" in sproc_export_file_name:
|
@@ -608,7 +621,7 @@ class MLPClassifier(BaseTransformer):
|
|
608
621
|
print("\n".join(fields[1:]))
|
609
622
|
|
610
623
|
session.file.get(
|
611
|
-
|
624
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
612
625
|
local_result_file_name,
|
613
626
|
statement_params=statement_params
|
614
627
|
)
|
@@ -654,7 +667,7 @@ class MLPClassifier(BaseTransformer):
|
|
654
667
|
|
655
668
|
# Register vectorized UDF for batch inference
|
656
669
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
657
|
-
safe_id=self.
|
670
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
658
671
|
|
659
672
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
660
673
|
# will try to pickle all of self which fails.
|
@@ -746,7 +759,7 @@ class MLPClassifier(BaseTransformer):
|
|
746
759
|
return transformed_pandas_df.to_dict("records")
|
747
760
|
|
748
761
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
749
|
-
safe_id=self.
|
762
|
+
safe_id=self._get_rand_id()
|
750
763
|
)
|
751
764
|
|
752
765
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -913,11 +926,18 @@ class MLPClassifier(BaseTransformer):
|
|
913
926
|
Transformed dataset.
|
914
927
|
"""
|
915
928
|
if isinstance(dataset, DataFrame):
|
929
|
+
expected_type_inferred = ""
|
930
|
+
# when it is classifier, infer the datatype from label columns
|
931
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
932
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
933
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
934
|
+
)
|
935
|
+
|
916
936
|
output_df = self._batch_inference(
|
917
937
|
dataset=dataset,
|
918
938
|
inference_method="predict",
|
919
939
|
expected_output_cols_list=self.output_cols,
|
920
|
-
expected_output_cols_type=
|
940
|
+
expected_output_cols_type=expected_type_inferred,
|
921
941
|
)
|
922
942
|
elif isinstance(dataset, pd.DataFrame):
|
923
943
|
output_df = self._sklearn_inference(
|
@@ -988,10 +1008,10 @@ class MLPClassifier(BaseTransformer):
|
|
988
1008
|
|
989
1009
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
990
1010
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
991
|
-
Returns
|
1011
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
992
1012
|
"""
|
993
1013
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
994
|
-
return []
|
1014
|
+
return [output_cols_prefix]
|
995
1015
|
|
996
1016
|
classes = self._sklearn_object.classes_
|
997
1017
|
if isinstance(classes, numpy.ndarray):
|
@@ -1220,7 +1240,7 @@ class MLPClassifier(BaseTransformer):
|
|
1220
1240
|
cp.dump(self._sklearn_object, local_score_file)
|
1221
1241
|
|
1222
1242
|
# Create temp stage to run score.
|
1223
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1243
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1224
1244
|
session = dataset._session
|
1225
1245
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1226
1246
|
SqlResultValidator(
|
@@ -1234,8 +1254,9 @@ class MLPClassifier(BaseTransformer):
|
|
1234
1254
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1235
1255
|
).validate()
|
1236
1256
|
|
1237
|
-
|
1238
|
-
|
1257
|
+
# Use posixpath to construct stage paths
|
1258
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1259
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1239
1260
|
statement_params = telemetry.get_function_usage_statement_params(
|
1240
1261
|
project=_PROJECT,
|
1241
1262
|
subproject=_SUBPROJECT,
|
@@ -1261,6 +1282,7 @@ class MLPClassifier(BaseTransformer):
|
|
1261
1282
|
replace=True,
|
1262
1283
|
session=session,
|
1263
1284
|
statement_params=statement_params,
|
1285
|
+
anonymous=True
|
1264
1286
|
)
|
1265
1287
|
def score_wrapper_sproc(
|
1266
1288
|
session: Session,
|
@@ -1268,7 +1290,8 @@ class MLPClassifier(BaseTransformer):
|
|
1268
1290
|
stage_score_file_name: str,
|
1269
1291
|
input_cols: List[str],
|
1270
1292
|
label_cols: List[str],
|
1271
|
-
sample_weight_col: Optional[str]
|
1293
|
+
sample_weight_col: Optional[str],
|
1294
|
+
statement_params: Dict[str, str]
|
1272
1295
|
) -> float:
|
1273
1296
|
import cloudpickle as cp
|
1274
1297
|
import numpy as np
|
@@ -1318,14 +1341,14 @@ class MLPClassifier(BaseTransformer):
|
|
1318
1341
|
api_calls=[Session.call],
|
1319
1342
|
custom_tags=dict([("autogen", True)]),
|
1320
1343
|
)
|
1321
|
-
score =
|
1322
|
-
|
1344
|
+
score = score_wrapper_sproc(
|
1345
|
+
session,
|
1323
1346
|
query,
|
1324
1347
|
stage_score_file_name,
|
1325
1348
|
identifier.get_unescaped_names(self.input_cols),
|
1326
1349
|
identifier.get_unescaped_names(self.label_cols),
|
1327
1350
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1328
|
-
statement_params
|
1351
|
+
statement_params,
|
1329
1352
|
)
|
1330
1353
|
|
1331
1354
|
cleanup_temp_files([local_score_file_name])
|
@@ -1343,18 +1366,20 @@ class MLPClassifier(BaseTransformer):
|
|
1343
1366
|
if self._sklearn_object._estimator_type == 'classifier':
|
1344
1367
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1345
1368
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1346
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1369
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1370
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1347
1371
|
# For regressor, the type of predict is float64
|
1348
1372
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1349
1373
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1350
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1351
|
-
|
1374
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1375
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1352
1376
|
for prob_func in PROB_FUNCTIONS:
|
1353
1377
|
if hasattr(self, prob_func):
|
1354
1378
|
output_cols_prefix: str = f"{prob_func}_"
|
1355
1379
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1356
1380
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1357
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1381
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1382
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1358
1383
|
|
1359
1384
|
@property
|
1360
1385
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -348,7 +350,6 @@ class MLPRegressor(BaseTransformer):
|
|
348
350
|
sample_weight_col: Optional[str] = None,
|
349
351
|
) -> None:
|
350
352
|
super().__init__()
|
351
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
352
353
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
353
354
|
|
354
355
|
self._deps = list(deps)
|
@@ -390,6 +391,15 @@ class MLPRegressor(BaseTransformer):
|
|
390
391
|
self.set_drop_input_cols(drop_input_cols)
|
391
392
|
self.set_sample_weight_col(sample_weight_col)
|
392
393
|
|
394
|
+
def _get_rand_id(self) -> str:
|
395
|
+
"""
|
396
|
+
Generate random id to be used in sproc and stage names.
|
397
|
+
|
398
|
+
Returns:
|
399
|
+
Random id string usable in sproc, table, and stage names.
|
400
|
+
"""
|
401
|
+
return str(uuid4()).replace("-", "_").upper()
|
402
|
+
|
393
403
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
394
404
|
"""
|
395
405
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -468,7 +478,7 @@ class MLPRegressor(BaseTransformer):
|
|
468
478
|
cp.dump(self._sklearn_object, local_transform_file)
|
469
479
|
|
470
480
|
# Create temp stage to run fit.
|
471
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
481
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
472
482
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
473
483
|
SqlResultValidator(
|
474
484
|
session=session,
|
@@ -481,11 +491,12 @@ class MLPRegressor(BaseTransformer):
|
|
481
491
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
482
492
|
).validate()
|
483
493
|
|
484
|
-
|
494
|
+
# Use posixpath to construct stage paths
|
495
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
496
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
485
497
|
local_result_file_name = get_temp_file_path()
|
486
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
487
498
|
|
488
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
499
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
489
500
|
statement_params = telemetry.get_function_usage_statement_params(
|
490
501
|
project=_PROJECT,
|
491
502
|
subproject=_SUBPROJECT,
|
@@ -511,6 +522,7 @@ class MLPRegressor(BaseTransformer):
|
|
511
522
|
replace=True,
|
512
523
|
session=session,
|
513
524
|
statement_params=statement_params,
|
525
|
+
anonymous=True
|
514
526
|
)
|
515
527
|
def fit_wrapper_sproc(
|
516
528
|
session: Session,
|
@@ -519,7 +531,8 @@ class MLPRegressor(BaseTransformer):
|
|
519
531
|
stage_result_file_name: str,
|
520
532
|
input_cols: List[str],
|
521
533
|
label_cols: List[str],
|
522
|
-
sample_weight_col: Optional[str]
|
534
|
+
sample_weight_col: Optional[str],
|
535
|
+
statement_params: Dict[str, str]
|
523
536
|
) -> str:
|
524
537
|
import cloudpickle as cp
|
525
538
|
import numpy as np
|
@@ -586,15 +599,15 @@ class MLPRegressor(BaseTransformer):
|
|
586
599
|
api_calls=[Session.call],
|
587
600
|
custom_tags=dict([("autogen", True)]),
|
588
601
|
)
|
589
|
-
sproc_export_file_name =
|
590
|
-
|
602
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
603
|
+
session,
|
591
604
|
query,
|
592
605
|
stage_transform_file_name,
|
593
606
|
stage_result_file_name,
|
594
607
|
identifier.get_unescaped_names(self.input_cols),
|
595
608
|
identifier.get_unescaped_names(self.label_cols),
|
596
609
|
identifier.get_unescaped_names(self.sample_weight_col),
|
597
|
-
statement_params
|
610
|
+
statement_params,
|
598
611
|
)
|
599
612
|
|
600
613
|
if "|" in sproc_export_file_name:
|
@@ -604,7 +617,7 @@ class MLPRegressor(BaseTransformer):
|
|
604
617
|
print("\n".join(fields[1:]))
|
605
618
|
|
606
619
|
session.file.get(
|
607
|
-
|
620
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
608
621
|
local_result_file_name,
|
609
622
|
statement_params=statement_params
|
610
623
|
)
|
@@ -650,7 +663,7 @@ class MLPRegressor(BaseTransformer):
|
|
650
663
|
|
651
664
|
# Register vectorized UDF for batch inference
|
652
665
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
653
|
-
safe_id=self.
|
666
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
654
667
|
|
655
668
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
656
669
|
# will try to pickle all of self which fails.
|
@@ -742,7 +755,7 @@ class MLPRegressor(BaseTransformer):
|
|
742
755
|
return transformed_pandas_df.to_dict("records")
|
743
756
|
|
744
757
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
745
|
-
safe_id=self.
|
758
|
+
safe_id=self._get_rand_id()
|
746
759
|
)
|
747
760
|
|
748
761
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -909,11 +922,18 @@ class MLPRegressor(BaseTransformer):
|
|
909
922
|
Transformed dataset.
|
910
923
|
"""
|
911
924
|
if isinstance(dataset, DataFrame):
|
925
|
+
expected_type_inferred = "float"
|
926
|
+
# when it is classifier, infer the datatype from label columns
|
927
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
928
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
929
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
930
|
+
)
|
931
|
+
|
912
932
|
output_df = self._batch_inference(
|
913
933
|
dataset=dataset,
|
914
934
|
inference_method="predict",
|
915
935
|
expected_output_cols_list=self.output_cols,
|
916
|
-
expected_output_cols_type=
|
936
|
+
expected_output_cols_type=expected_type_inferred,
|
917
937
|
)
|
918
938
|
elif isinstance(dataset, pd.DataFrame):
|
919
939
|
output_df = self._sklearn_inference(
|
@@ -984,10 +1004,10 @@ class MLPRegressor(BaseTransformer):
|
|
984
1004
|
|
985
1005
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
986
1006
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
987
|
-
Returns
|
1007
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
988
1008
|
"""
|
989
1009
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
990
|
-
return []
|
1010
|
+
return [output_cols_prefix]
|
991
1011
|
|
992
1012
|
classes = self._sklearn_object.classes_
|
993
1013
|
if isinstance(classes, numpy.ndarray):
|
@@ -1212,7 +1232,7 @@ class MLPRegressor(BaseTransformer):
|
|
1212
1232
|
cp.dump(self._sklearn_object, local_score_file)
|
1213
1233
|
|
1214
1234
|
# Create temp stage to run score.
|
1215
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1235
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1216
1236
|
session = dataset._session
|
1217
1237
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1218
1238
|
SqlResultValidator(
|
@@ -1226,8 +1246,9 @@ class MLPRegressor(BaseTransformer):
|
|
1226
1246
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1227
1247
|
).validate()
|
1228
1248
|
|
1229
|
-
|
1230
|
-
|
1249
|
+
# Use posixpath to construct stage paths
|
1250
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1251
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1231
1252
|
statement_params = telemetry.get_function_usage_statement_params(
|
1232
1253
|
project=_PROJECT,
|
1233
1254
|
subproject=_SUBPROJECT,
|
@@ -1253,6 +1274,7 @@ class MLPRegressor(BaseTransformer):
|
|
1253
1274
|
replace=True,
|
1254
1275
|
session=session,
|
1255
1276
|
statement_params=statement_params,
|
1277
|
+
anonymous=True
|
1256
1278
|
)
|
1257
1279
|
def score_wrapper_sproc(
|
1258
1280
|
session: Session,
|
@@ -1260,7 +1282,8 @@ class MLPRegressor(BaseTransformer):
|
|
1260
1282
|
stage_score_file_name: str,
|
1261
1283
|
input_cols: List[str],
|
1262
1284
|
label_cols: List[str],
|
1263
|
-
sample_weight_col: Optional[str]
|
1285
|
+
sample_weight_col: Optional[str],
|
1286
|
+
statement_params: Dict[str, str]
|
1264
1287
|
) -> float:
|
1265
1288
|
import cloudpickle as cp
|
1266
1289
|
import numpy as np
|
@@ -1310,14 +1333,14 @@ class MLPRegressor(BaseTransformer):
|
|
1310
1333
|
api_calls=[Session.call],
|
1311
1334
|
custom_tags=dict([("autogen", True)]),
|
1312
1335
|
)
|
1313
|
-
score =
|
1314
|
-
|
1336
|
+
score = score_wrapper_sproc(
|
1337
|
+
session,
|
1315
1338
|
query,
|
1316
1339
|
stage_score_file_name,
|
1317
1340
|
identifier.get_unescaped_names(self.input_cols),
|
1318
1341
|
identifier.get_unescaped_names(self.label_cols),
|
1319
1342
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1320
|
-
statement_params
|
1343
|
+
statement_params,
|
1321
1344
|
)
|
1322
1345
|
|
1323
1346
|
cleanup_temp_files([local_score_file_name])
|
@@ -1335,18 +1358,20 @@ class MLPRegressor(BaseTransformer):
|
|
1335
1358
|
if self._sklearn_object._estimator_type == 'classifier':
|
1336
1359
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1337
1360
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1338
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1361
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1362
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1339
1363
|
# For regressor, the type of predict is float64
|
1340
1364
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1341
1365
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1342
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1343
|
-
|
1366
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1367
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1344
1368
|
for prob_func in PROB_FUNCTIONS:
|
1345
1369
|
if hasattr(self, prob_func):
|
1346
1370
|
output_cols_prefix: str = f"{prob_func}_"
|
1347
1371
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1348
1372
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1349
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1373
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1374
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1350
1375
|
|
1351
1376
|
@property
|
1352
1377
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -121,6 +121,7 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
121
121
|
self.categories_: Dict[str, type_utils.LiteralNDArrayType] = {}
|
122
122
|
self._categories_list: List[type_utils.LiteralNDArrayType] = []
|
123
123
|
self._missing_indices: Dict[int, int] = {}
|
124
|
+
self._infrequent_enabled = False
|
124
125
|
self._vocab_table_name = "snowml_preprocessing_ordinal_encoder_temp_table_" + uuid.uuid4().hex
|
125
126
|
|
126
127
|
self.set_input_cols(input_cols)
|
@@ -547,6 +548,7 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
547
548
|
if self._is_fitted:
|
548
549
|
encoder.categories_ = self._categories_list
|
549
550
|
encoder._missing_indices = self._missing_indices
|
551
|
+
encoder._infrequent_enabled = self._infrequent_enabled
|
550
552
|
return encoder
|
551
553
|
|
552
554
|
def _validate_keywords(self) -> None:
|