snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +16 -8
- snowflake/cortex/_classify_text.py +12 -1
- snowflake/cortex/_complete.py +82 -13
- snowflake/cortex/_embed_text_1024.py +9 -2
- snowflake/cortex/_embed_text_768.py +9 -2
- snowflake/cortex/_extract_answer.py +9 -2
- snowflake/cortex/_sentiment.py +9 -2
- snowflake/cortex/_summarize.py +9 -2
- snowflake/cortex/_translate.py +9 -2
- snowflake/ml/_internal/env_utils.py +7 -52
- snowflake/ml/_internal/utils/identifier.py +4 -2
- snowflake/ml/data/__init__.py +3 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
- snowflake/ml/data/data_connector.py +53 -11
- snowflake/ml/data/data_ingestor.py +2 -1
- snowflake/ml/data/torch_utils.py +18 -5
- snowflake/ml/feature_store/examples/example_helper.py +2 -1
- snowflake/ml/fileset/fileset.py +18 -18
- snowflake/ml/model/_client/model/model_version_impl.py +5 -3
- snowflake/ml/model/_client/ops/model_ops.py +2 -6
- snowflake/ml/model/_client/sql/model_version.py +11 -0
- snowflake/ml/model/_model_composer/model_composer.py +8 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
- snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
- snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
- snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
- snowflake/ml/model/_packager/model_handlers/_utils.py +27 -2
- snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +5 -1
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
- snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
- snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
- snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
- snowflake/ml/model/_signatures/pandas_handler.py +1 -1
- snowflake/ml/model/_signatures/snowpark_handler.py +8 -2
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
- snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
- snowflake/ml/modeling/pipeline/pipeline.py +6 -176
- snowflake/ml/modeling/xgboost/xgb_classifier.py +161 -88
- snowflake/ml/modeling/xgboost/xgb_regressor.py +160 -85
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +160 -85
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +160 -85
- snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
- snowflake/ml/registry/_manager/model_manager.py +70 -33
- snowflake/ml/registry/registry.py +41 -22
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/METADATA +38 -9
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/RECORD +63 -67
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/utils/retryable_http.py +0 -39
- snowflake/ml/fileset/parquet_parser.py +0 -170
- snowflake/ml/fileset/tf_dataset.py +0 -88
- snowflake/ml/fileset/torch_datapipe.py +0 -57
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,9 @@
|
|
1
|
-
import os
|
2
1
|
from typing import List, Optional, Union
|
3
2
|
|
4
3
|
import pandas as pd
|
5
4
|
from sklearn import model_selection
|
6
5
|
|
7
6
|
from snowflake.ml._internal.exceptions import error_codes, exceptions
|
8
|
-
from snowflake.ml.modeling._internal.constants import IN_ML_RUNTIME_ENV_VAR
|
9
7
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
10
8
|
get_module_name,
|
11
9
|
is_single_node,
|
@@ -13,9 +11,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
13
11
|
from snowflake.ml.modeling._internal.local_implementations.pandas_trainer import (
|
14
12
|
PandasModelTrainer,
|
15
13
|
)
|
16
|
-
from snowflake.ml.modeling._internal.ml_runtime_implementations.ml_runtime_trainer import (
|
17
|
-
MLRuntimeModelTrainer,
|
18
|
-
)
|
19
14
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
20
15
|
from snowflake.ml.modeling._internal.snowpark_implementations.distributed_hpo_trainer import (
|
21
16
|
DistributedHPOTrainer,
|
@@ -107,9 +102,6 @@ class ModelTrainerBuilder:
|
|
107
102
|
"autogenerated": autogenerated,
|
108
103
|
"subproject": subproject,
|
109
104
|
}
|
110
|
-
if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
|
111
|
-
return MLRuntimeModelTrainer(**init_args) # type: ignore[arg-type, return-value]
|
112
|
-
|
113
105
|
trainer_klass = SnowparkModelTrainer
|
114
106
|
|
115
107
|
assert dataset._session is not None # Make MyPy happy
|
@@ -1,16 +1,11 @@
|
|
1
|
-
import os
|
2
1
|
from typing import Optional, Union
|
3
2
|
|
4
3
|
import pandas as pd
|
5
4
|
|
6
5
|
from snowflake import snowpark
|
7
|
-
from snowflake.ml.modeling._internal.constants import IN_ML_RUNTIME_ENV_VAR
|
8
6
|
from snowflake.ml.modeling._internal.local_implementations.pandas_handlers import (
|
9
7
|
PandasTransformHandlers,
|
10
8
|
)
|
11
|
-
from snowflake.ml.modeling._internal.ml_runtime_implementations.ml_runtime_handlers import (
|
12
|
-
MLRuntimeTransformHandlers,
|
13
|
-
)
|
14
9
|
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import (
|
15
10
|
SnowparkTransformHandlers,
|
16
11
|
)
|
@@ -63,14 +58,6 @@ class ModelTransformerBuilder:
|
|
63
58
|
)
|
64
59
|
|
65
60
|
elif isinstance(dataset, snowpark.DataFrame):
|
66
|
-
if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
|
67
|
-
return MLRuntimeTransformHandlers(
|
68
|
-
dataset=dataset,
|
69
|
-
estimator=estimator,
|
70
|
-
class_name=class_name,
|
71
|
-
subproject=subproject,
|
72
|
-
autogenerated=autogenerated,
|
73
|
-
)
|
74
61
|
return SnowparkTransformHandlers(
|
75
62
|
dataset=dataset,
|
76
63
|
estimator=estimator,
|
@@ -30,7 +30,6 @@ from snowflake.snowpark._internal import utils as snowpark_utils
|
|
30
30
|
|
31
31
|
_PROJECT = "ModelDevelopment"
|
32
32
|
_SUBPROJECT = "Framework"
|
33
|
-
IN_ML_RUNTIME_ENV_VAR = "IN_SPCS_ML_RUNTIME"
|
34
33
|
|
35
34
|
|
36
35
|
def _final_step_has(attr: str) -> Callable[..., bool]:
|
@@ -432,10 +431,7 @@ class Pipeline(base.BaseTransformer):
|
|
432
431
|
data_sources = [data_source.DataFrameInfo(dataset.queries["queries"][-1])]
|
433
432
|
lineage_utils.set_data_sources(self, data_sources)
|
434
433
|
|
435
|
-
if
|
436
|
-
self._fit_ml_runtime(dataset)
|
437
|
-
|
438
|
-
elif squash and isinstance(dataset, snowpark.DataFrame):
|
434
|
+
if squash and isinstance(dataset, snowpark.DataFrame):
|
439
435
|
session = dataset._session
|
440
436
|
assert session is not None
|
441
437
|
self._fit_snowpark_dataframe_within_one_sproc(session=session, dataset=dataset)
|
@@ -606,25 +602,7 @@ class Pipeline(base.BaseTransformer):
|
|
606
602
|
Returns:
|
607
603
|
Output dataset.
|
608
604
|
"""
|
609
|
-
|
610
|
-
expected_output_cols = self._infer_output_cols()
|
611
|
-
handler = ModelTransformerBuilder.build(
|
612
|
-
dataset=dataset,
|
613
|
-
estimator=self._sklearn_object,
|
614
|
-
class_name="Pipeline",
|
615
|
-
subproject="",
|
616
|
-
autogenerated=False,
|
617
|
-
)
|
618
|
-
return handler.batch_inference(
|
619
|
-
inference_method="predict",
|
620
|
-
input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
|
621
|
-
expected_output_cols=expected_output_cols,
|
622
|
-
session=dataset._session,
|
623
|
-
dependencies=self._deps,
|
624
|
-
)
|
625
|
-
|
626
|
-
else:
|
627
|
-
return self._invoke_estimator_func("predict", dataset)
|
605
|
+
return self._invoke_estimator_func("predict", dataset)
|
628
606
|
|
629
607
|
@metaestimators.available_if(_final_step_has("score_samples")) # type: ignore[misc]
|
630
608
|
@telemetry.send_api_usage_telemetry(
|
@@ -642,32 +620,8 @@ class Pipeline(base.BaseTransformer):
|
|
642
620
|
|
643
621
|
Returns:
|
644
622
|
Output dataset.
|
645
|
-
|
646
|
-
Raises:
|
647
|
-
ValueError: An sklearn object has not been fit before calling this function
|
648
623
|
"""
|
649
|
-
|
650
|
-
if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
|
651
|
-
if self._sklearn_object is None:
|
652
|
-
raise ValueError("Model must be fit before inference.")
|
653
|
-
|
654
|
-
expected_output_cols = self._get_output_column_names("score_samples")
|
655
|
-
handler = ModelTransformerBuilder.build(
|
656
|
-
dataset=dataset,
|
657
|
-
estimator=self._sklearn_object,
|
658
|
-
class_name="Pipeline",
|
659
|
-
subproject="",
|
660
|
-
autogenerated=False,
|
661
|
-
)
|
662
|
-
return handler.batch_inference(
|
663
|
-
inference_method="score_samples",
|
664
|
-
input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
|
665
|
-
expected_output_cols=expected_output_cols,
|
666
|
-
session=dataset._session,
|
667
|
-
dependencies=self._deps,
|
668
|
-
)
|
669
|
-
else:
|
670
|
-
return self._invoke_estimator_func("score_samples", dataset)
|
624
|
+
return self._invoke_estimator_func("score_samples", dataset)
|
671
625
|
|
672
626
|
@metaestimators.available_if(_final_step_has("predict_proba")) # type: ignore[misc]
|
673
627
|
@telemetry.send_api_usage_telemetry(
|
@@ -685,32 +639,8 @@ class Pipeline(base.BaseTransformer):
|
|
685
639
|
|
686
640
|
Returns:
|
687
641
|
Output dataset.
|
688
|
-
|
689
|
-
Raises:
|
690
|
-
ValueError: An sklearn object has not been fit before calling this function
|
691
642
|
"""
|
692
|
-
|
693
|
-
if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
|
694
|
-
if self._sklearn_object is None:
|
695
|
-
raise ValueError("Model must be fit before inference.")
|
696
|
-
expected_output_cols = self._get_output_column_names("predict_proba")
|
697
|
-
|
698
|
-
handler = ModelTransformerBuilder.build(
|
699
|
-
dataset=dataset,
|
700
|
-
estimator=self._sklearn_object,
|
701
|
-
class_name="Pipeline",
|
702
|
-
subproject="",
|
703
|
-
autogenerated=False,
|
704
|
-
)
|
705
|
-
return handler.batch_inference(
|
706
|
-
inference_method="predict_proba",
|
707
|
-
input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
|
708
|
-
expected_output_cols=expected_output_cols,
|
709
|
-
session=dataset._session,
|
710
|
-
dependencies=self._deps,
|
711
|
-
)
|
712
|
-
else:
|
713
|
-
return self._invoke_estimator_func("predict_proba", dataset)
|
643
|
+
return self._invoke_estimator_func("predict_proba", dataset)
|
714
644
|
|
715
645
|
@metaestimators.available_if(_final_step_has("predict_log_proba")) # type: ignore[misc]
|
716
646
|
@telemetry.send_api_usage_telemetry(
|
@@ -729,31 +659,8 @@ class Pipeline(base.BaseTransformer):
|
|
729
659
|
|
730
660
|
Returns:
|
731
661
|
Output dataset.
|
732
|
-
|
733
|
-
Raises:
|
734
|
-
ValueError: An sklearn object has not been fit before calling this function
|
735
662
|
"""
|
736
|
-
|
737
|
-
if self._sklearn_object is None:
|
738
|
-
raise ValueError("Model must be fit before inference.")
|
739
|
-
|
740
|
-
expected_output_cols = self._get_output_column_names("predict_log_proba")
|
741
|
-
handler = ModelTransformerBuilder.build(
|
742
|
-
dataset=dataset,
|
743
|
-
estimator=self._sklearn_object,
|
744
|
-
class_name="Pipeline",
|
745
|
-
subproject="",
|
746
|
-
autogenerated=False,
|
747
|
-
)
|
748
|
-
return handler.batch_inference(
|
749
|
-
inference_method="predict_log_proba",
|
750
|
-
input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
|
751
|
-
expected_output_cols=expected_output_cols,
|
752
|
-
session=dataset._session,
|
753
|
-
dependencies=self._deps,
|
754
|
-
)
|
755
|
-
else:
|
756
|
-
return self._invoke_estimator_func("predict_log_proba", dataset)
|
663
|
+
return self._invoke_estimator_func("predict_log_proba", dataset)
|
757
664
|
|
758
665
|
@metaestimators.available_if(_final_step_has("score")) # type: ignore[misc]
|
759
666
|
@telemetry.send_api_usage_telemetry(
|
@@ -769,30 +676,9 @@ class Pipeline(base.BaseTransformer):
|
|
769
676
|
|
770
677
|
Returns:
|
771
678
|
Output dataset.
|
772
|
-
|
773
|
-
Raises:
|
774
|
-
ValueError: An sklearn object has not been fit before calling this function
|
775
679
|
"""
|
776
680
|
|
777
|
-
|
778
|
-
if self._sklearn_object is None:
|
779
|
-
raise ValueError("Model must be fit before scoreing.")
|
780
|
-
handler = ModelTransformerBuilder.build(
|
781
|
-
dataset=dataset,
|
782
|
-
estimator=self._sklearn_object,
|
783
|
-
class_name="Pipeline",
|
784
|
-
subproject="",
|
785
|
-
autogenerated=False,
|
786
|
-
)
|
787
|
-
return handler.score(
|
788
|
-
input_cols=self._infer_input_cols(),
|
789
|
-
label_cols=self._get_label_cols(),
|
790
|
-
session=dataset._session,
|
791
|
-
dependencies=self._deps,
|
792
|
-
score_sproc_imports=[],
|
793
|
-
)
|
794
|
-
else:
|
795
|
-
return self._invoke_estimator_func("score", dataset)
|
681
|
+
return self._invoke_estimator_func("score", dataset)
|
796
682
|
|
797
683
|
def _invoke_estimator_func(
|
798
684
|
self, func_name: str, dataset: Union[snowpark.DataFrame, pd.DataFrame]
|
@@ -882,39 +768,6 @@ class Pipeline(base.BaseTransformer):
|
|
882
768
|
|
883
769
|
return ct
|
884
770
|
|
885
|
-
def _fit_ml_runtime(self, dataset: snowpark.DataFrame) -> None:
|
886
|
-
"""Train the pipeline in the ML Runtime.
|
887
|
-
|
888
|
-
Args:
|
889
|
-
dataset: The training Snowpark dataframe
|
890
|
-
|
891
|
-
Raises:
|
892
|
-
ModuleNotFoundError: The ML Runtime Client is not installed.
|
893
|
-
"""
|
894
|
-
try:
|
895
|
-
from snowflake.ml.runtime import MLRuntimeClient
|
896
|
-
except ModuleNotFoundError as e:
|
897
|
-
# The snowflake.ml.runtime module should always be present when
|
898
|
-
# the env var IN_SPCS_ML_RUNTIME is present.
|
899
|
-
raise ModuleNotFoundError("ML Runtime Python Client is not installed.") from e
|
900
|
-
|
901
|
-
client = MLRuntimeClient()
|
902
|
-
ml_runtime_compatible_pipeline = self._create_unfitted_sklearn_object()
|
903
|
-
|
904
|
-
label_cols = self._get_label_cols()
|
905
|
-
all_df_cols = dataset.columns
|
906
|
-
input_cols = [col for col in all_df_cols if col not in label_cols]
|
907
|
-
|
908
|
-
trained_pipeline = client.train(
|
909
|
-
estimator=ml_runtime_compatible_pipeline,
|
910
|
-
dataset=dataset,
|
911
|
-
input_cols=input_cols,
|
912
|
-
label_cols=label_cols,
|
913
|
-
sample_weight_col=self.sample_weight_col,
|
914
|
-
)
|
915
|
-
|
916
|
-
self._sklearn_object = trained_pipeline
|
917
|
-
|
918
771
|
def _get_label_cols(self) -> List[str]:
|
919
772
|
"""Util function to get the label columns from the pipeline.
|
920
773
|
The label column is only present in the estimator
|
@@ -929,28 +782,6 @@ class Pipeline(base.BaseTransformer):
|
|
929
782
|
|
930
783
|
return label_cols
|
931
784
|
|
932
|
-
def _can_be_trained_in_ml_runtime(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> bool:
|
933
|
-
"""A utility function to determine if the pipeline cam be pushed down to the ML Runtime for training.
|
934
|
-
Currently, this is true if:
|
935
|
-
- The training dataset is a snowpark dataframe,
|
936
|
-
- The IN_SPCS_ML_RUNTIME environment is present and
|
937
|
-
- The pipeline can be converted to an sklearn pipeline.
|
938
|
-
|
939
|
-
Args:
|
940
|
-
dataset: The training dataset
|
941
|
-
|
942
|
-
Returns:
|
943
|
-
True if the dataset can be fit in the ml runtime, else false.
|
944
|
-
|
945
|
-
"""
|
946
|
-
if not isinstance(dataset, snowpark.DataFrame):
|
947
|
-
return False
|
948
|
-
|
949
|
-
if not os.environ.get(IN_ML_RUNTIME_ENV_VAR):
|
950
|
-
return False
|
951
|
-
|
952
|
-
return self._is_convertible_to_sklearn
|
953
|
-
|
954
785
|
@staticmethod
|
955
786
|
def _wrap_transformer_in_column_transformer(
|
956
787
|
transformer_name: str, transformer: base.BaseTransformer
|
@@ -1124,7 +955,6 @@ class Pipeline(base.BaseTransformer):
|
|
1124
955
|
|
1125
956
|
telemetry_data = {
|
1126
957
|
"pipeline_is_convertible_to_sklearn": self._is_convertible_to_sklearn,
|
1127
|
-
"in_spcs_ml_runtime": bool(os.environ.get(IN_ML_RUNTIME_ENV_VAR)),
|
1128
958
|
}
|
1129
959
|
telemetry.send_custom_usage(
|
1130
960
|
project=_PROJECT,
|