snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +16 -8
- snowflake/cortex/_classify_text.py +12 -1
- snowflake/cortex/_complete.py +101 -13
- snowflake/cortex/_embed_text_1024.py +9 -2
- snowflake/cortex/_embed_text_768.py +9 -2
- snowflake/cortex/_extract_answer.py +9 -2
- snowflake/cortex/_sentiment.py +9 -2
- snowflake/cortex/_summarize.py +9 -2
- snowflake/cortex/_translate.py +9 -2
- snowflake/ml/_internal/env_utils.py +7 -52
- snowflake/ml/_internal/platform_capabilities.py +87 -0
- snowflake/ml/_internal/utils/identifier.py +4 -2
- snowflake/ml/data/__init__.py +3 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
- snowflake/ml/data/data_connector.py +53 -11
- snowflake/ml/data/data_ingestor.py +2 -1
- snowflake/ml/data/torch_utils.py +18 -5
- snowflake/ml/dataset/dataset.py +0 -1
- snowflake/ml/feature_store/examples/example_helper.py +2 -1
- snowflake/ml/fileset/fileset.py +24 -18
- snowflake/ml/jobs/__init__.py +21 -0
- snowflake/ml/jobs/_utils/constants.py +51 -0
- snowflake/ml/jobs/_utils/payload_utils.py +352 -0
- snowflake/ml/jobs/_utils/spec_utils.py +298 -0
- snowflake/ml/jobs/_utils/types.py +39 -0
- snowflake/ml/jobs/decorators.py +91 -0
- snowflake/ml/jobs/job.py +113 -0
- snowflake/ml/jobs/manager.py +298 -0
- snowflake/ml/model/_client/model/model_version_impl.py +5 -3
- snowflake/ml/model/_client/ops/model_ops.py +13 -8
- snowflake/ml/model/_client/ops/service_ops.py +1 -11
- snowflake/ml/model/_client/sql/model_version.py +11 -0
- snowflake/ml/model/_client/sql/service.py +13 -6
- snowflake/ml/model/_model_composer/model_composer.py +8 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
- snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
- snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
- snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
- snowflake/ml/model/_packager/model_handlers/_utils.py +39 -5
- snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
- snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +6 -1
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
- snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
- snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
- snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
- snowflake/ml/model/_signatures/base_handler.py +1 -2
- snowflake/ml/model/_signatures/builtins_handler.py +2 -2
- snowflake/ml/model/_signatures/numpy_handler.py +6 -7
- snowflake/ml/model/_signatures/pandas_handler.py +3 -3
- snowflake/ml/model/_signatures/pytorch_handler.py +2 -5
- snowflake/ml/model/_signatures/snowpark_handler.py +11 -5
- snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
- snowflake/ml/model/model_signature.py +17 -4
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
- snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
- snowflake/ml/modeling/cluster/birch.py +6 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
- snowflake/ml/modeling/cluster/dbscan.py +6 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
- snowflake/ml/modeling/cluster/k_means.py +6 -3
- snowflake/ml/modeling/cluster/mean_shift.py +6 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
- snowflake/ml/modeling/cluster/optics.py +6 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
- snowflake/ml/modeling/compose/column_transformer.py +6 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
- snowflake/ml/modeling/covariance/oas.py +6 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/pca.py +6 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
- snowflake/ml/modeling/impute/knn_imputer.py +6 -3
- snowflake/ml/modeling/impute/missing_indicator.py +6 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/lars.py +6 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/perceptron.py +6 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ridge.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
- snowflake/ml/modeling/manifold/isomap.py +6 -3
- snowflake/ml/modeling/manifold/mds.py +6 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
- snowflake/ml/modeling/manifold/tsne.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
- snowflake/ml/modeling/pipeline/pipeline.py +16 -178
- snowflake/ml/modeling/preprocessing/polynomial_features.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
- snowflake/ml/modeling/svm/linear_svc.py +6 -3
- snowflake/ml/modeling/svm/linear_svr.py +6 -3
- snowflake/ml/modeling/svm/nu_svc.py +6 -3
- snowflake/ml/modeling/svm/nu_svr.py +6 -3
- snowflake/ml/modeling/svm/svc.py +6 -3
- snowflake/ml/modeling/svm/svr.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +167 -91
- snowflake/ml/modeling/xgboost/xgb_regressor.py +166 -88
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +166 -88
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +166 -88
- snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
- snowflake/ml/registry/_manager/model_manager.py +70 -33
- snowflake/ml/registry/registry.py +41 -22
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/METADATA +63 -19
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/RECORD +231 -226
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/utils/retryable_http.py +0 -39
- snowflake/ml/fileset/parquet_parser.py +0 -170
- snowflake/ml/fileset/tf_dataset.py +0 -88
- snowflake/ml/fileset/torch_datapipe.py +0 -57
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,11 @@ from snowflake.ml._internal.exceptions import error_codes, exceptions
|
|
20
20
|
from snowflake.ml._internal.lineage import lineage_utils
|
21
21
|
from snowflake.ml._internal.utils import snowpark_dataframe_utils, temp_file_utils
|
22
22
|
from snowflake.ml.data import data_source
|
23
|
-
from snowflake.ml.model.model_signature import
|
23
|
+
from snowflake.ml.model.model_signature import (
|
24
|
+
ModelSignature,
|
25
|
+
_infer_signature,
|
26
|
+
_truncate_data,
|
27
|
+
)
|
24
28
|
from snowflake.ml.modeling._internal.model_transformer_builder import (
|
25
29
|
ModelTransformerBuilder,
|
26
30
|
)
|
@@ -30,7 +34,8 @@ from snowflake.snowpark._internal import utils as snowpark_utils
|
|
30
34
|
|
31
35
|
_PROJECT = "ModelDevelopment"
|
32
36
|
_SUBPROJECT = "Framework"
|
33
|
-
|
37
|
+
|
38
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
34
39
|
|
35
40
|
|
36
41
|
def _final_step_has(attr: str) -> Callable[..., bool]:
|
@@ -432,10 +437,7 @@ class Pipeline(base.BaseTransformer):
|
|
432
437
|
data_sources = [data_source.DataFrameInfo(dataset.queries["queries"][-1])]
|
433
438
|
lineage_utils.set_data_sources(self, data_sources)
|
434
439
|
|
435
|
-
if
|
436
|
-
self._fit_ml_runtime(dataset)
|
437
|
-
|
438
|
-
elif squash and isinstance(dataset, snowpark.DataFrame):
|
440
|
+
if squash and isinstance(dataset, snowpark.DataFrame):
|
439
441
|
session = dataset._session
|
440
442
|
assert session is not None
|
441
443
|
self._fit_snowpark_dataframe_within_one_sproc(session=session, dataset=dataset)
|
@@ -606,25 +608,7 @@ class Pipeline(base.BaseTransformer):
|
|
606
608
|
Returns:
|
607
609
|
Output dataset.
|
608
610
|
"""
|
609
|
-
|
610
|
-
expected_output_cols = self._infer_output_cols()
|
611
|
-
handler = ModelTransformerBuilder.build(
|
612
|
-
dataset=dataset,
|
613
|
-
estimator=self._sklearn_object,
|
614
|
-
class_name="Pipeline",
|
615
|
-
subproject="",
|
616
|
-
autogenerated=False,
|
617
|
-
)
|
618
|
-
return handler.batch_inference(
|
619
|
-
inference_method="predict",
|
620
|
-
input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
|
621
|
-
expected_output_cols=expected_output_cols,
|
622
|
-
session=dataset._session,
|
623
|
-
dependencies=self._deps,
|
624
|
-
)
|
625
|
-
|
626
|
-
else:
|
627
|
-
return self._invoke_estimator_func("predict", dataset)
|
611
|
+
return self._invoke_estimator_func("predict", dataset)
|
628
612
|
|
629
613
|
@metaestimators.available_if(_final_step_has("score_samples")) # type: ignore[misc]
|
630
614
|
@telemetry.send_api_usage_telemetry(
|
@@ -642,32 +626,8 @@ class Pipeline(base.BaseTransformer):
|
|
642
626
|
|
643
627
|
Returns:
|
644
628
|
Output dataset.
|
645
|
-
|
646
|
-
Raises:
|
647
|
-
ValueError: An sklearn object has not been fit before calling this function
|
648
629
|
"""
|
649
|
-
|
650
|
-
if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
|
651
|
-
if self._sklearn_object is None:
|
652
|
-
raise ValueError("Model must be fit before inference.")
|
653
|
-
|
654
|
-
expected_output_cols = self._get_output_column_names("score_samples")
|
655
|
-
handler = ModelTransformerBuilder.build(
|
656
|
-
dataset=dataset,
|
657
|
-
estimator=self._sklearn_object,
|
658
|
-
class_name="Pipeline",
|
659
|
-
subproject="",
|
660
|
-
autogenerated=False,
|
661
|
-
)
|
662
|
-
return handler.batch_inference(
|
663
|
-
inference_method="score_samples",
|
664
|
-
input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
|
665
|
-
expected_output_cols=expected_output_cols,
|
666
|
-
session=dataset._session,
|
667
|
-
dependencies=self._deps,
|
668
|
-
)
|
669
|
-
else:
|
670
|
-
return self._invoke_estimator_func("score_samples", dataset)
|
630
|
+
return self._invoke_estimator_func("score_samples", dataset)
|
671
631
|
|
672
632
|
@metaestimators.available_if(_final_step_has("predict_proba")) # type: ignore[misc]
|
673
633
|
@telemetry.send_api_usage_telemetry(
|
@@ -685,32 +645,8 @@ class Pipeline(base.BaseTransformer):
|
|
685
645
|
|
686
646
|
Returns:
|
687
647
|
Output dataset.
|
688
|
-
|
689
|
-
Raises:
|
690
|
-
ValueError: An sklearn object has not been fit before calling this function
|
691
648
|
"""
|
692
|
-
|
693
|
-
if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
|
694
|
-
if self._sklearn_object is None:
|
695
|
-
raise ValueError("Model must be fit before inference.")
|
696
|
-
expected_output_cols = self._get_output_column_names("predict_proba")
|
697
|
-
|
698
|
-
handler = ModelTransformerBuilder.build(
|
699
|
-
dataset=dataset,
|
700
|
-
estimator=self._sklearn_object,
|
701
|
-
class_name="Pipeline",
|
702
|
-
subproject="",
|
703
|
-
autogenerated=False,
|
704
|
-
)
|
705
|
-
return handler.batch_inference(
|
706
|
-
inference_method="predict_proba",
|
707
|
-
input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
|
708
|
-
expected_output_cols=expected_output_cols,
|
709
|
-
session=dataset._session,
|
710
|
-
dependencies=self._deps,
|
711
|
-
)
|
712
|
-
else:
|
713
|
-
return self._invoke_estimator_func("predict_proba", dataset)
|
649
|
+
return self._invoke_estimator_func("predict_proba", dataset)
|
714
650
|
|
715
651
|
@metaestimators.available_if(_final_step_has("predict_log_proba")) # type: ignore[misc]
|
716
652
|
@telemetry.send_api_usage_telemetry(
|
@@ -729,31 +665,8 @@ class Pipeline(base.BaseTransformer):
|
|
729
665
|
|
730
666
|
Returns:
|
731
667
|
Output dataset.
|
732
|
-
|
733
|
-
Raises:
|
734
|
-
ValueError: An sklearn object has not been fit before calling this function
|
735
668
|
"""
|
736
|
-
|
737
|
-
if self._sklearn_object is None:
|
738
|
-
raise ValueError("Model must be fit before inference.")
|
739
|
-
|
740
|
-
expected_output_cols = self._get_output_column_names("predict_log_proba")
|
741
|
-
handler = ModelTransformerBuilder.build(
|
742
|
-
dataset=dataset,
|
743
|
-
estimator=self._sklearn_object,
|
744
|
-
class_name="Pipeline",
|
745
|
-
subproject="",
|
746
|
-
autogenerated=False,
|
747
|
-
)
|
748
|
-
return handler.batch_inference(
|
749
|
-
inference_method="predict_log_proba",
|
750
|
-
input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
|
751
|
-
expected_output_cols=expected_output_cols,
|
752
|
-
session=dataset._session,
|
753
|
-
dependencies=self._deps,
|
754
|
-
)
|
755
|
-
else:
|
756
|
-
return self._invoke_estimator_func("predict_log_proba", dataset)
|
669
|
+
return self._invoke_estimator_func("predict_log_proba", dataset)
|
757
670
|
|
758
671
|
@metaestimators.available_if(_final_step_has("score")) # type: ignore[misc]
|
759
672
|
@telemetry.send_api_usage_telemetry(
|
@@ -769,30 +682,9 @@ class Pipeline(base.BaseTransformer):
|
|
769
682
|
|
770
683
|
Returns:
|
771
684
|
Output dataset.
|
772
|
-
|
773
|
-
Raises:
|
774
|
-
ValueError: An sklearn object has not been fit before calling this function
|
775
685
|
"""
|
776
686
|
|
777
|
-
|
778
|
-
if self._sklearn_object is None:
|
779
|
-
raise ValueError("Model must be fit before scoreing.")
|
780
|
-
handler = ModelTransformerBuilder.build(
|
781
|
-
dataset=dataset,
|
782
|
-
estimator=self._sklearn_object,
|
783
|
-
class_name="Pipeline",
|
784
|
-
subproject="",
|
785
|
-
autogenerated=False,
|
786
|
-
)
|
787
|
-
return handler.score(
|
788
|
-
input_cols=self._infer_input_cols(),
|
789
|
-
label_cols=self._get_label_cols(),
|
790
|
-
session=dataset._session,
|
791
|
-
dependencies=self._deps,
|
792
|
-
score_sproc_imports=[],
|
793
|
-
)
|
794
|
-
else:
|
795
|
-
return self._invoke_estimator_func("score", dataset)
|
687
|
+
return self._invoke_estimator_func("score", dataset)
|
796
688
|
|
797
689
|
def _invoke_estimator_func(
|
798
690
|
self, func_name: str, dataset: Union[snowpark.DataFrame, pd.DataFrame]
|
@@ -882,39 +774,6 @@ class Pipeline(base.BaseTransformer):
|
|
882
774
|
|
883
775
|
return ct
|
884
776
|
|
885
|
-
def _fit_ml_runtime(self, dataset: snowpark.DataFrame) -> None:
|
886
|
-
"""Train the pipeline in the ML Runtime.
|
887
|
-
|
888
|
-
Args:
|
889
|
-
dataset: The training Snowpark dataframe
|
890
|
-
|
891
|
-
Raises:
|
892
|
-
ModuleNotFoundError: The ML Runtime Client is not installed.
|
893
|
-
"""
|
894
|
-
try:
|
895
|
-
from snowflake.ml.runtime import MLRuntimeClient
|
896
|
-
except ModuleNotFoundError as e:
|
897
|
-
# The snowflake.ml.runtime module should always be present when
|
898
|
-
# the env var IN_SPCS_ML_RUNTIME is present.
|
899
|
-
raise ModuleNotFoundError("ML Runtime Python Client is not installed.") from e
|
900
|
-
|
901
|
-
client = MLRuntimeClient()
|
902
|
-
ml_runtime_compatible_pipeline = self._create_unfitted_sklearn_object()
|
903
|
-
|
904
|
-
label_cols = self._get_label_cols()
|
905
|
-
all_df_cols = dataset.columns
|
906
|
-
input_cols = [col for col in all_df_cols if col not in label_cols]
|
907
|
-
|
908
|
-
trained_pipeline = client.train(
|
909
|
-
estimator=ml_runtime_compatible_pipeline,
|
910
|
-
dataset=dataset,
|
911
|
-
input_cols=input_cols,
|
912
|
-
label_cols=label_cols,
|
913
|
-
sample_weight_col=self.sample_weight_col,
|
914
|
-
)
|
915
|
-
|
916
|
-
self._sklearn_object = trained_pipeline
|
917
|
-
|
918
777
|
def _get_label_cols(self) -> List[str]:
|
919
778
|
"""Util function to get the label columns from the pipeline.
|
920
779
|
The label column is only present in the estimator
|
@@ -929,28 +788,6 @@ class Pipeline(base.BaseTransformer):
|
|
929
788
|
|
930
789
|
return label_cols
|
931
790
|
|
932
|
-
def _can_be_trained_in_ml_runtime(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> bool:
|
933
|
-
"""A utility function to determine if the pipeline cam be pushed down to the ML Runtime for training.
|
934
|
-
Currently, this is true if:
|
935
|
-
- The training dataset is a snowpark dataframe,
|
936
|
-
- The IN_SPCS_ML_RUNTIME environment is present and
|
937
|
-
- The pipeline can be converted to an sklearn pipeline.
|
938
|
-
|
939
|
-
Args:
|
940
|
-
dataset: The training dataset
|
941
|
-
|
942
|
-
Returns:
|
943
|
-
True if the dataset can be fit in the ml runtime, else false.
|
944
|
-
|
945
|
-
"""
|
946
|
-
if not isinstance(dataset, snowpark.DataFrame):
|
947
|
-
return False
|
948
|
-
|
949
|
-
if not os.environ.get(IN_ML_RUNTIME_ENV_VAR):
|
950
|
-
return False
|
951
|
-
|
952
|
-
return self._is_convertible_to_sklearn
|
953
|
-
|
954
791
|
@staticmethod
|
955
792
|
def _wrap_transformer_in_column_transformer(
|
956
793
|
transformer_name: str, transformer: base.BaseTransformer
|
@@ -1054,7 +891,9 @@ class Pipeline(base.BaseTransformer):
|
|
1054
891
|
self._model_signature_dict = dict()
|
1055
892
|
|
1056
893
|
input_columns = self._get_sanitized_list_of_columns(dataset.columns)
|
1057
|
-
inputs_signature = _infer_signature(
|
894
|
+
inputs_signature = _infer_signature(
|
895
|
+
_truncate_data(dataset[input_columns], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True
|
896
|
+
)
|
1058
897
|
|
1059
898
|
estimator_step = self._get_estimator()
|
1060
899
|
if estimator_step:
|
@@ -1124,7 +963,6 @@ class Pipeline(base.BaseTransformer):
|
|
1124
963
|
|
1125
964
|
telemetry_data = {
|
1126
965
|
"pipeline_is_convertible_to_sklearn": self._is_convertible_to_sklearn,
|
1127
|
-
"in_spcs_ml_runtime": bool(os.environ.get(IN_ML_RUNTIME_ENV_VAR)),
|
1128
966
|
}
|
1129
967
|
telemetry.send_custom_usage(
|
1130
968
|
project=_PROJECT,
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.preprocessing".replace("
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class PolynomialFeatures(BaseTransformer):
|
61
64
|
r"""Generate polynomial and interaction features
|
62
65
|
For more details on this class, see [sklearn.preprocessing.PolynomialFeatures]
|
@@ -429,7 +432,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
429
432
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
430
433
|
expected_dtype = "array"
|
431
434
|
else:
|
432
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
435
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
433
436
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
434
437
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
435
438
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1086,7 +1089,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
1086
1089
|
|
1087
1090
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1088
1091
|
|
1089
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1092
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1090
1093
|
outputs: List[BaseFeatureSpec] = []
|
1091
1094
|
if hasattr(self, "predict"):
|
1092
1095
|
# keep mypy happy
|
@@ -1094,7 +1097,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
1094
1097
|
# For classifier, the type of predict is the same as the type of label
|
1095
1098
|
if self._sklearn_object._estimator_type == "classifier":
|
1096
1099
|
# label columns is the desired type for output
|
1097
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1100
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1098
1101
|
# rename the output columns
|
1099
1102
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1100
1103
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.semi_supervised".replace
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LabelPropagation(BaseTransformer):
|
61
64
|
r"""Label Propagation classifier
|
62
65
|
For more details on this class, see [sklearn.semi_supervised.LabelPropagation]
|
@@ -433,7 +436,7 @@ class LabelPropagation(BaseTransformer):
|
|
433
436
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
434
437
|
expected_dtype = "array"
|
435
438
|
else:
|
436
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
439
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
437
440
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
438
441
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
439
442
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1094,7 +1097,7 @@ class LabelPropagation(BaseTransformer):
|
|
1094
1097
|
|
1095
1098
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1096
1099
|
|
1097
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1100
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1098
1101
|
outputs: List[BaseFeatureSpec] = []
|
1099
1102
|
if hasattr(self, "predict"):
|
1100
1103
|
# keep mypy happy
|
@@ -1102,7 +1105,7 @@ class LabelPropagation(BaseTransformer):
|
|
1102
1105
|
# For classifier, the type of predict is the same as the type of label
|
1103
1106
|
if self._sklearn_object._estimator_type == "classifier":
|
1104
1107
|
# label columns is the desired type for output
|
1105
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1108
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1106
1109
|
# rename the output columns
|
1107
1110
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1108
1111
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.semi_supervised".replace
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LabelSpreading(BaseTransformer):
|
61
64
|
r"""LabelSpreading model for semi-supervised learning
|
62
65
|
For more details on this class, see [sklearn.semi_supervised.LabelSpreading]
|
@@ -442,7 +445,7 @@ class LabelSpreading(BaseTransformer):
|
|
442
445
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
443
446
|
expected_dtype = "array"
|
444
447
|
else:
|
445
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
448
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
446
449
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
447
450
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
448
451
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1103,7 +1106,7 @@ class LabelSpreading(BaseTransformer):
|
|
1103
1106
|
|
1104
1107
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1105
1108
|
|
1106
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1109
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1107
1110
|
outputs: List[BaseFeatureSpec] = []
|
1108
1111
|
if hasattr(self, "predict"):
|
1109
1112
|
# keep mypy happy
|
@@ -1111,7 +1114,7 @@ class LabelSpreading(BaseTransformer):
|
|
1111
1114
|
# For classifier, the type of predict is the same as the type of label
|
1112
1115
|
if self._sklearn_object._estimator_type == "classifier":
|
1113
1116
|
# label columns is the desired type for output
|
1114
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1117
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1115
1118
|
# rename the output columns
|
1116
1119
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1117
1120
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LinearSVC(BaseTransformer):
|
61
64
|
r"""Linear Support Vector Classification
|
62
65
|
For more details on this class, see [sklearn.svm.LinearSVC]
|
@@ -507,7 +510,7 @@ class LinearSVC(BaseTransformer):
|
|
507
510
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
508
511
|
expected_dtype = "array"
|
509
512
|
else:
|
510
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
513
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
511
514
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
512
515
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
513
516
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1166,7 +1169,7 @@ class LinearSVC(BaseTransformer):
|
|
1166
1169
|
|
1167
1170
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1168
1171
|
|
1169
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1172
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1170
1173
|
outputs: List[BaseFeatureSpec] = []
|
1171
1174
|
if hasattr(self, "predict"):
|
1172
1175
|
# keep mypy happy
|
@@ -1174,7 +1177,7 @@ class LinearSVC(BaseTransformer):
|
|
1174
1177
|
# For classifier, the type of predict is the same as the type of label
|
1175
1178
|
if self._sklearn_object._estimator_type == "classifier":
|
1176
1179
|
# label columns is the desired type for output
|
1177
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1180
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1178
1181
|
# rename the output columns
|
1179
1182
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1180
1183
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LinearSVR(BaseTransformer):
|
61
64
|
r"""Linear Support Vector Regression
|
62
65
|
For more details on this class, see [sklearn.svm.LinearSVR]
|
@@ -476,7 +479,7 @@ class LinearSVR(BaseTransformer):
|
|
476
479
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
477
480
|
expected_dtype = "array"
|
478
481
|
else:
|
479
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
482
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
480
483
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
481
484
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
482
485
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1133,7 +1136,7 @@ class LinearSVR(BaseTransformer):
|
|
1133
1136
|
|
1134
1137
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1135
1138
|
|
1136
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1139
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1137
1140
|
outputs: List[BaseFeatureSpec] = []
|
1138
1141
|
if hasattr(self, "predict"):
|
1139
1142
|
# keep mypy happy
|
@@ -1141,7 +1144,7 @@ class LinearSVR(BaseTransformer):
|
|
1141
1144
|
# For classifier, the type of predict is the same as the type of label
|
1142
1145
|
if self._sklearn_object._estimator_type == "classifier":
|
1143
1146
|
# label columns is the desired type for output
|
1144
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1147
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1145
1148
|
# rename the output columns
|
1146
1149
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1147
1150
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class NuSVC(BaseTransformer):
|
61
64
|
r"""Nu-Support Vector Classification
|
62
65
|
For more details on this class, see [sklearn.svm.NuSVC]
|
@@ -506,7 +509,7 @@ class NuSVC(BaseTransformer):
|
|
506
509
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
507
510
|
expected_dtype = "array"
|
508
511
|
else:
|
509
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
512
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
510
513
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
511
514
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
512
515
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1169,7 +1172,7 @@ class NuSVC(BaseTransformer):
|
|
1169
1172
|
|
1170
1173
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1171
1174
|
|
1172
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1175
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1173
1176
|
outputs: List[BaseFeatureSpec] = []
|
1174
1177
|
if hasattr(self, "predict"):
|
1175
1178
|
# keep mypy happy
|
@@ -1177,7 +1180,7 @@ class NuSVC(BaseTransformer):
|
|
1177
1180
|
# For classifier, the type of predict is the same as the type of label
|
1178
1181
|
if self._sklearn_object._estimator_type == "classifier":
|
1179
1182
|
# label columns is the desired type for output
|
1180
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1183
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1181
1184
|
# rename the output columns
|
1182
1185
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1183
1186
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class NuSVR(BaseTransformer):
|
61
64
|
r"""Nu Support Vector Regression
|
62
65
|
For more details on this class, see [sklearn.svm.NuSVR]
|
@@ -467,7 +470,7 @@ class NuSVR(BaseTransformer):
|
|
467
470
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
468
471
|
expected_dtype = "array"
|
469
472
|
else:
|
470
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
473
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
471
474
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
472
475
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
473
476
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1124,7 +1127,7 @@ class NuSVR(BaseTransformer):
|
|
1124
1127
|
|
1125
1128
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1126
1129
|
|
1127
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1130
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1128
1131
|
outputs: List[BaseFeatureSpec] = []
|
1129
1132
|
if hasattr(self, "predict"):
|
1130
1133
|
# keep mypy happy
|
@@ -1132,7 +1135,7 @@ class NuSVR(BaseTransformer):
|
|
1132
1135
|
# For classifier, the type of predict is the same as the type of label
|
1133
1136
|
if self._sklearn_object._estimator_type == "classifier":
|
1134
1137
|
# label columns is the desired type for output
|
1135
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1138
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1136
1139
|
# rename the output columns
|
1137
1140
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1138
1141
|
self._model_signature_dict["predict"] = ModelSignature(
|
snowflake/ml/modeling/svm/svc.py
CHANGED
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class SVC(BaseTransformer):
|
61
64
|
r"""C-Support Vector Classification
|
62
65
|
For more details on this class, see [sklearn.svm.SVC]
|
@@ -511,7 +514,7 @@ class SVC(BaseTransformer):
|
|
511
514
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
512
515
|
expected_dtype = "array"
|
513
516
|
else:
|
514
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
517
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
515
518
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
516
519
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
517
520
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1174,7 +1177,7 @@ class SVC(BaseTransformer):
|
|
1174
1177
|
|
1175
1178
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1176
1179
|
|
1177
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1180
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1178
1181
|
outputs: List[BaseFeatureSpec] = []
|
1179
1182
|
if hasattr(self, "predict"):
|
1180
1183
|
# keep mypy happy
|
@@ -1182,7 +1185,7 @@ class SVC(BaseTransformer):
|
|
1182
1185
|
# For classifier, the type of predict is the same as the type of label
|
1183
1186
|
if self._sklearn_object._estimator_type == "classifier":
|
1184
1187
|
# label columns is the desired type for output
|
1185
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1188
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1186
1189
|
# rename the output columns
|
1187
1190
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1188
1191
|
self._model_signature_dict["predict"] = ModelSignature(
|