snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -302,7 +304,6 @@ class TSNE(BaseTransformer):
|
|
302
304
|
sample_weight_col: Optional[str] = None,
|
303
305
|
) -> None:
|
304
306
|
super().__init__()
|
305
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
306
307
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
307
308
|
|
308
309
|
self._deps = list(deps)
|
@@ -337,6 +338,15 @@ class TSNE(BaseTransformer):
|
|
337
338
|
self.set_drop_input_cols(drop_input_cols)
|
338
339
|
self.set_sample_weight_col(sample_weight_col)
|
339
340
|
|
341
|
+
def _get_rand_id(self) -> str:
|
342
|
+
"""
|
343
|
+
Generate random id to be used in sproc and stage names.
|
344
|
+
|
345
|
+
Returns:
|
346
|
+
Random id string usable in sproc, table, and stage names.
|
347
|
+
"""
|
348
|
+
return str(uuid4()).replace("-", "_").upper()
|
349
|
+
|
340
350
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
341
351
|
"""
|
342
352
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -415,7 +425,7 @@ class TSNE(BaseTransformer):
|
|
415
425
|
cp.dump(self._sklearn_object, local_transform_file)
|
416
426
|
|
417
427
|
# Create temp stage to run fit.
|
418
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
428
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
419
429
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
420
430
|
SqlResultValidator(
|
421
431
|
session=session,
|
@@ -428,11 +438,12 @@ class TSNE(BaseTransformer):
|
|
428
438
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
429
439
|
).validate()
|
430
440
|
|
431
|
-
|
441
|
+
# Use posixpath to construct stage paths
|
442
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
443
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
432
444
|
local_result_file_name = get_temp_file_path()
|
433
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
434
445
|
|
435
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
446
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
436
447
|
statement_params = telemetry.get_function_usage_statement_params(
|
437
448
|
project=_PROJECT,
|
438
449
|
subproject=_SUBPROJECT,
|
@@ -458,6 +469,7 @@ class TSNE(BaseTransformer):
|
|
458
469
|
replace=True,
|
459
470
|
session=session,
|
460
471
|
statement_params=statement_params,
|
472
|
+
anonymous=True
|
461
473
|
)
|
462
474
|
def fit_wrapper_sproc(
|
463
475
|
session: Session,
|
@@ -466,7 +478,8 @@ class TSNE(BaseTransformer):
|
|
466
478
|
stage_result_file_name: str,
|
467
479
|
input_cols: List[str],
|
468
480
|
label_cols: List[str],
|
469
|
-
sample_weight_col: Optional[str]
|
481
|
+
sample_weight_col: Optional[str],
|
482
|
+
statement_params: Dict[str, str]
|
470
483
|
) -> str:
|
471
484
|
import cloudpickle as cp
|
472
485
|
import numpy as np
|
@@ -533,15 +546,15 @@ class TSNE(BaseTransformer):
|
|
533
546
|
api_calls=[Session.call],
|
534
547
|
custom_tags=dict([("autogen", True)]),
|
535
548
|
)
|
536
|
-
sproc_export_file_name =
|
537
|
-
|
549
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
550
|
+
session,
|
538
551
|
query,
|
539
552
|
stage_transform_file_name,
|
540
553
|
stage_result_file_name,
|
541
554
|
identifier.get_unescaped_names(self.input_cols),
|
542
555
|
identifier.get_unescaped_names(self.label_cols),
|
543
556
|
identifier.get_unescaped_names(self.sample_weight_col),
|
544
|
-
statement_params
|
557
|
+
statement_params,
|
545
558
|
)
|
546
559
|
|
547
560
|
if "|" in sproc_export_file_name:
|
@@ -551,7 +564,7 @@ class TSNE(BaseTransformer):
|
|
551
564
|
print("\n".join(fields[1:]))
|
552
565
|
|
553
566
|
session.file.get(
|
554
|
-
|
567
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
555
568
|
local_result_file_name,
|
556
569
|
statement_params=statement_params
|
557
570
|
)
|
@@ -597,7 +610,7 @@ class TSNE(BaseTransformer):
|
|
597
610
|
|
598
611
|
# Register vectorized UDF for batch inference
|
599
612
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
600
|
-
safe_id=self.
|
613
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
601
614
|
|
602
615
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
603
616
|
# will try to pickle all of self which fails.
|
@@ -689,7 +702,7 @@ class TSNE(BaseTransformer):
|
|
689
702
|
return transformed_pandas_df.to_dict("records")
|
690
703
|
|
691
704
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
692
|
-
safe_id=self.
|
705
|
+
safe_id=self._get_rand_id()
|
693
706
|
)
|
694
707
|
|
695
708
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -745,26 +758,37 @@ class TSNE(BaseTransformer):
|
|
745
758
|
# input cols need to match unquoted / quoted
|
746
759
|
input_cols = self.input_cols
|
747
760
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
761
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
748
762
|
|
749
763
|
estimator = self._sklearn_object
|
750
764
|
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
765
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
766
|
+
missing_features = []
|
767
|
+
features_in_dataset = set(dataset.columns)
|
768
|
+
columns_to_select = []
|
769
|
+
for i, f in enumerate(features_required_by_estimator):
|
770
|
+
if (
|
771
|
+
i >= len(input_cols)
|
772
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
773
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
774
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
775
|
+
):
|
776
|
+
missing_features.append(f)
|
777
|
+
elif input_cols[i] in features_in_dataset:
|
778
|
+
columns_to_select.append(input_cols[i])
|
779
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
780
|
+
columns_to_select.append(unquoted_input_cols[i])
|
781
|
+
else:
|
782
|
+
columns_to_select.append(quoted_input_cols[i])
|
783
|
+
|
784
|
+
if len(missing_features) > 0:
|
785
|
+
raise ValueError(
|
786
|
+
"The feature names should match with those that were passed during fit.\n"
|
787
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
788
|
+
f"Features in the input dataframe : {input_cols}\n"
|
789
|
+
)
|
790
|
+
input_df = dataset[columns_to_select]
|
791
|
+
input_df.columns = features_required_by_estimator
|
768
792
|
|
769
793
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
770
794
|
input_df
|
@@ -843,11 +867,18 @@ class TSNE(BaseTransformer):
|
|
843
867
|
Transformed dataset.
|
844
868
|
"""
|
845
869
|
if isinstance(dataset, DataFrame):
|
870
|
+
expected_type_inferred = ""
|
871
|
+
# when it is classifier, infer the datatype from label columns
|
872
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
873
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
874
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
875
|
+
)
|
876
|
+
|
846
877
|
output_df = self._batch_inference(
|
847
878
|
dataset=dataset,
|
848
879
|
inference_method="predict",
|
849
880
|
expected_output_cols_list=self.output_cols,
|
850
|
-
expected_output_cols_type=
|
881
|
+
expected_output_cols_type=expected_type_inferred,
|
851
882
|
)
|
852
883
|
elif isinstance(dataset, pd.DataFrame):
|
853
884
|
output_df = self._sklearn_inference(
|
@@ -918,10 +949,10 @@ class TSNE(BaseTransformer):
|
|
918
949
|
|
919
950
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
920
951
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
921
|
-
Returns
|
952
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
922
953
|
"""
|
923
954
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
924
|
-
return []
|
955
|
+
return [output_cols_prefix]
|
925
956
|
|
926
957
|
classes = self._sklearn_object.classes_
|
927
958
|
if isinstance(classes, numpy.ndarray):
|
@@ -1146,7 +1177,7 @@ class TSNE(BaseTransformer):
|
|
1146
1177
|
cp.dump(self._sklearn_object, local_score_file)
|
1147
1178
|
|
1148
1179
|
# Create temp stage to run score.
|
1149
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1180
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1150
1181
|
session = dataset._session
|
1151
1182
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1152
1183
|
SqlResultValidator(
|
@@ -1160,8 +1191,9 @@ class TSNE(BaseTransformer):
|
|
1160
1191
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1161
1192
|
).validate()
|
1162
1193
|
|
1163
|
-
|
1164
|
-
|
1194
|
+
# Use posixpath to construct stage paths
|
1195
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1196
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1165
1197
|
statement_params = telemetry.get_function_usage_statement_params(
|
1166
1198
|
project=_PROJECT,
|
1167
1199
|
subproject=_SUBPROJECT,
|
@@ -1187,6 +1219,7 @@ class TSNE(BaseTransformer):
|
|
1187
1219
|
replace=True,
|
1188
1220
|
session=session,
|
1189
1221
|
statement_params=statement_params,
|
1222
|
+
anonymous=True
|
1190
1223
|
)
|
1191
1224
|
def score_wrapper_sproc(
|
1192
1225
|
session: Session,
|
@@ -1194,7 +1227,8 @@ class TSNE(BaseTransformer):
|
|
1194
1227
|
stage_score_file_name: str,
|
1195
1228
|
input_cols: List[str],
|
1196
1229
|
label_cols: List[str],
|
1197
|
-
sample_weight_col: Optional[str]
|
1230
|
+
sample_weight_col: Optional[str],
|
1231
|
+
statement_params: Dict[str, str]
|
1198
1232
|
) -> float:
|
1199
1233
|
import cloudpickle as cp
|
1200
1234
|
import numpy as np
|
@@ -1244,14 +1278,14 @@ class TSNE(BaseTransformer):
|
|
1244
1278
|
api_calls=[Session.call],
|
1245
1279
|
custom_tags=dict([("autogen", True)]),
|
1246
1280
|
)
|
1247
|
-
score =
|
1248
|
-
|
1281
|
+
score = score_wrapper_sproc(
|
1282
|
+
session,
|
1249
1283
|
query,
|
1250
1284
|
stage_score_file_name,
|
1251
1285
|
identifier.get_unescaped_names(self.input_cols),
|
1252
1286
|
identifier.get_unescaped_names(self.label_cols),
|
1253
1287
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1254
|
-
statement_params
|
1288
|
+
statement_params,
|
1255
1289
|
)
|
1256
1290
|
|
1257
1291
|
cleanup_temp_files([local_score_file_name])
|
@@ -1269,18 +1303,20 @@ class TSNE(BaseTransformer):
|
|
1269
1303
|
if self._sklearn_object._estimator_type == 'classifier':
|
1270
1304
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1271
1305
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1272
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1306
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1307
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1273
1308
|
# For regressor, the type of predict is float64
|
1274
1309
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1275
1310
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1276
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1277
|
-
|
1311
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1312
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1278
1313
|
for prob_func in PROB_FUNCTIONS:
|
1279
1314
|
if hasattr(self, prob_func):
|
1280
1315
|
output_cols_prefix: str = f"{prob_func}_"
|
1281
1316
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1282
1317
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1283
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1318
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1319
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1284
1320
|
|
1285
1321
|
@property
|
1286
1322
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -54,7 +54,12 @@ def accuracy_score(
|
|
54
54
|
metrics_utils.check_label_columns(y_true_col_names, y_pred_col_names)
|
55
55
|
|
56
56
|
if isinstance(y_true_col_names, str) or (len(y_true_col_names) == 1):
|
57
|
-
|
57
|
+
y_true, y_pred = (
|
58
|
+
(y_true_col_names, y_pred_col_names)
|
59
|
+
if isinstance(y_true_col_names, str)
|
60
|
+
else (y_true_col_names[0], y_pred_col_names[0])
|
61
|
+
)
|
62
|
+
score_column = F.iff(df[y_true] == df[y_pred], 1, 0) # type: ignore[arg-type]
|
58
63
|
# multilabel
|
59
64
|
else:
|
60
65
|
expr = " and ".join([f"({y_true_col_names[i]} = {y_pred_col_names[i]})" for i in range(len(y_true_col_names))])
|