snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. snowflake/cortex/__init__.py +16 -8
  2. snowflake/cortex/_classify_text.py +12 -1
  3. snowflake/cortex/_complete.py +82 -13
  4. snowflake/cortex/_embed_text_1024.py +9 -2
  5. snowflake/cortex/_embed_text_768.py +9 -2
  6. snowflake/cortex/_extract_answer.py +9 -2
  7. snowflake/cortex/_sentiment.py +9 -2
  8. snowflake/cortex/_summarize.py +9 -2
  9. snowflake/cortex/_translate.py +9 -2
  10. snowflake/ml/_internal/env_utils.py +7 -52
  11. snowflake/ml/_internal/utils/identifier.py +4 -2
  12. snowflake/ml/data/__init__.py +3 -0
  13. snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
  14. snowflake/ml/data/data_connector.py +53 -11
  15. snowflake/ml/data/data_ingestor.py +2 -1
  16. snowflake/ml/data/torch_utils.py +18 -5
  17. snowflake/ml/feature_store/examples/example_helper.py +2 -1
  18. snowflake/ml/fileset/fileset.py +18 -18
  19. snowflake/ml/model/_client/model/model_version_impl.py +5 -3
  20. snowflake/ml/model/_client/ops/model_ops.py +2 -6
  21. snowflake/ml/model/_client/sql/model_version.py +11 -0
  22. snowflake/ml/model/_model_composer/model_composer.py +8 -3
  23. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
  24. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
  25. snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
  26. snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
  27. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
  28. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
  29. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
  30. snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
  31. snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
  32. snowflake/ml/model/_packager/model_handlers/_utils.py +27 -2
  33. snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
  34. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +5 -1
  35. snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
  36. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
  37. snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
  38. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
  39. snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
  40. snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
  41. snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
  42. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  43. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
  44. snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
  45. snowflake/ml/model/_signatures/pandas_handler.py +1 -1
  46. snowflake/ml/model/_signatures/snowpark_handler.py +8 -2
  47. snowflake/ml/model/type_hints.py +1 -0
  48. snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
  49. snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
  50. snowflake/ml/modeling/pipeline/pipeline.py +6 -176
  51. snowflake/ml/modeling/xgboost/xgb_classifier.py +161 -88
  52. snowflake/ml/modeling/xgboost/xgb_regressor.py +160 -85
  53. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +160 -85
  54. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +160 -85
  55. snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
  56. snowflake/ml/registry/_manager/model_manager.py +70 -33
  57. snowflake/ml/registry/registry.py +41 -22
  58. snowflake/ml/version.py +1 -1
  59. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/METADATA +38 -9
  60. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/RECORD +63 -67
  61. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/WHEEL +1 -1
  62. snowflake/ml/_internal/utils/retryable_http.py +0 -39
  63. snowflake/ml/fileset/parquet_parser.py +0 -170
  64. snowflake/ml/fileset/tf_dataset.py +0 -88
  65. snowflake/ml/fileset/torch_datapipe.py +0 -57
  66. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
  67. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
  68. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/LICENSE.txt +0 -0
  69. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,9 @@
1
- import os
2
1
  from typing import List, Optional, Union
3
2
 
4
3
  import pandas as pd
5
4
  from sklearn import model_selection
6
5
 
7
6
  from snowflake.ml._internal.exceptions import error_codes, exceptions
8
- from snowflake.ml.modeling._internal.constants import IN_ML_RUNTIME_ENV_VAR
9
7
  from snowflake.ml.modeling._internal.estimator_utils import (
10
8
  get_module_name,
11
9
  is_single_node,
@@ -13,9 +11,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
13
11
  from snowflake.ml.modeling._internal.local_implementations.pandas_trainer import (
14
12
  PandasModelTrainer,
15
13
  )
16
- from snowflake.ml.modeling._internal.ml_runtime_implementations.ml_runtime_trainer import (
17
- MLRuntimeModelTrainer,
18
- )
19
14
  from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
20
15
  from snowflake.ml.modeling._internal.snowpark_implementations.distributed_hpo_trainer import (
21
16
  DistributedHPOTrainer,
@@ -107,9 +102,6 @@ class ModelTrainerBuilder:
107
102
  "autogenerated": autogenerated,
108
103
  "subproject": subproject,
109
104
  }
110
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
111
- return MLRuntimeModelTrainer(**init_args) # type: ignore[arg-type, return-value]
112
-
113
105
  trainer_klass = SnowparkModelTrainer
114
106
 
115
107
  assert dataset._session is not None # Make MyPy happy
@@ -1,16 +1,11 @@
1
- import os
2
1
  from typing import Optional, Union
3
2
 
4
3
  import pandas as pd
5
4
 
6
5
  from snowflake import snowpark
7
- from snowflake.ml.modeling._internal.constants import IN_ML_RUNTIME_ENV_VAR
8
6
  from snowflake.ml.modeling._internal.local_implementations.pandas_handlers import (
9
7
  PandasTransformHandlers,
10
8
  )
11
- from snowflake.ml.modeling._internal.ml_runtime_implementations.ml_runtime_handlers import (
12
- MLRuntimeTransformHandlers,
13
- )
14
9
  from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import (
15
10
  SnowparkTransformHandlers,
16
11
  )
@@ -63,14 +58,6 @@ class ModelTransformerBuilder:
63
58
  )
64
59
 
65
60
  elif isinstance(dataset, snowpark.DataFrame):
66
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
67
- return MLRuntimeTransformHandlers(
68
- dataset=dataset,
69
- estimator=estimator,
70
- class_name=class_name,
71
- subproject=subproject,
72
- autogenerated=autogenerated,
73
- )
74
61
  return SnowparkTransformHandlers(
75
62
  dataset=dataset,
76
63
  estimator=estimator,
@@ -30,7 +30,6 @@ from snowflake.snowpark._internal import utils as snowpark_utils
30
30
 
31
31
  _PROJECT = "ModelDevelopment"
32
32
  _SUBPROJECT = "Framework"
33
- IN_ML_RUNTIME_ENV_VAR = "IN_SPCS_ML_RUNTIME"
34
33
 
35
34
 
36
35
  def _final_step_has(attr: str) -> Callable[..., bool]:
@@ -432,10 +431,7 @@ class Pipeline(base.BaseTransformer):
432
431
  data_sources = [data_source.DataFrameInfo(dataset.queries["queries"][-1])]
433
432
  lineage_utils.set_data_sources(self, data_sources)
434
433
 
435
- if self._can_be_trained_in_ml_runtime(dataset):
436
- self._fit_ml_runtime(dataset)
437
-
438
- elif squash and isinstance(dataset, snowpark.DataFrame):
434
+ if squash and isinstance(dataset, snowpark.DataFrame):
439
435
  session = dataset._session
440
436
  assert session is not None
441
437
  self._fit_snowpark_dataframe_within_one_sproc(session=session, dataset=dataset)
@@ -606,25 +602,7 @@ class Pipeline(base.BaseTransformer):
606
602
  Returns:
607
603
  Output dataset.
608
604
  """
609
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR) and self._sklearn_object is not None:
610
- expected_output_cols = self._infer_output_cols()
611
- handler = ModelTransformerBuilder.build(
612
- dataset=dataset,
613
- estimator=self._sklearn_object,
614
- class_name="Pipeline",
615
- subproject="",
616
- autogenerated=False,
617
- )
618
- return handler.batch_inference(
619
- inference_method="predict",
620
- input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
621
- expected_output_cols=expected_output_cols,
622
- session=dataset._session,
623
- dependencies=self._deps,
624
- )
625
-
626
- else:
627
- return self._invoke_estimator_func("predict", dataset)
605
+ return self._invoke_estimator_func("predict", dataset)
628
606
 
629
607
  @metaestimators.available_if(_final_step_has("score_samples")) # type: ignore[misc]
630
608
  @telemetry.send_api_usage_telemetry(
@@ -642,32 +620,8 @@ class Pipeline(base.BaseTransformer):
642
620
 
643
621
  Returns:
644
622
  Output dataset.
645
-
646
- Raises:
647
- ValueError: An sklearn object has not been fit before calling this function
648
623
  """
649
-
650
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
651
- if self._sklearn_object is None:
652
- raise ValueError("Model must be fit before inference.")
653
-
654
- expected_output_cols = self._get_output_column_names("score_samples")
655
- handler = ModelTransformerBuilder.build(
656
- dataset=dataset,
657
- estimator=self._sklearn_object,
658
- class_name="Pipeline",
659
- subproject="",
660
- autogenerated=False,
661
- )
662
- return handler.batch_inference(
663
- inference_method="score_samples",
664
- input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
665
- expected_output_cols=expected_output_cols,
666
- session=dataset._session,
667
- dependencies=self._deps,
668
- )
669
- else:
670
- return self._invoke_estimator_func("score_samples", dataset)
624
+ return self._invoke_estimator_func("score_samples", dataset)
671
625
 
672
626
  @metaestimators.available_if(_final_step_has("predict_proba")) # type: ignore[misc]
673
627
  @telemetry.send_api_usage_telemetry(
@@ -685,32 +639,8 @@ class Pipeline(base.BaseTransformer):
685
639
 
686
640
  Returns:
687
641
  Output dataset.
688
-
689
- Raises:
690
- ValueError: An sklearn object has not been fit before calling this function
691
642
  """
692
-
693
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
694
- if self._sklearn_object is None:
695
- raise ValueError("Model must be fit before inference.")
696
- expected_output_cols = self._get_output_column_names("predict_proba")
697
-
698
- handler = ModelTransformerBuilder.build(
699
- dataset=dataset,
700
- estimator=self._sklearn_object,
701
- class_name="Pipeline",
702
- subproject="",
703
- autogenerated=False,
704
- )
705
- return handler.batch_inference(
706
- inference_method="predict_proba",
707
- input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
708
- expected_output_cols=expected_output_cols,
709
- session=dataset._session,
710
- dependencies=self._deps,
711
- )
712
- else:
713
- return self._invoke_estimator_func("predict_proba", dataset)
643
+ return self._invoke_estimator_func("predict_proba", dataset)
714
644
 
715
645
  @metaestimators.available_if(_final_step_has("predict_log_proba")) # type: ignore[misc]
716
646
  @telemetry.send_api_usage_telemetry(
@@ -729,31 +659,8 @@ class Pipeline(base.BaseTransformer):
729
659
 
730
660
  Returns:
731
661
  Output dataset.
732
-
733
- Raises:
734
- ValueError: An sklearn object has not been fit before calling this function
735
662
  """
736
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
737
- if self._sklearn_object is None:
738
- raise ValueError("Model must be fit before inference.")
739
-
740
- expected_output_cols = self._get_output_column_names("predict_log_proba")
741
- handler = ModelTransformerBuilder.build(
742
- dataset=dataset,
743
- estimator=self._sklearn_object,
744
- class_name="Pipeline",
745
- subproject="",
746
- autogenerated=False,
747
- )
748
- return handler.batch_inference(
749
- inference_method="predict_log_proba",
750
- input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
751
- expected_output_cols=expected_output_cols,
752
- session=dataset._session,
753
- dependencies=self._deps,
754
- )
755
- else:
756
- return self._invoke_estimator_func("predict_log_proba", dataset)
663
+ return self._invoke_estimator_func("predict_log_proba", dataset)
757
664
 
758
665
  @metaestimators.available_if(_final_step_has("score")) # type: ignore[misc]
759
666
  @telemetry.send_api_usage_telemetry(
@@ -769,30 +676,9 @@ class Pipeline(base.BaseTransformer):
769
676
 
770
677
  Returns:
771
678
  Output dataset.
772
-
773
- Raises:
774
- ValueError: An sklearn object has not been fit before calling this function
775
679
  """
776
680
 
777
- if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
778
- if self._sklearn_object is None:
779
- raise ValueError("Model must be fit before scoreing.")
780
- handler = ModelTransformerBuilder.build(
781
- dataset=dataset,
782
- estimator=self._sklearn_object,
783
- class_name="Pipeline",
784
- subproject="",
785
- autogenerated=False,
786
- )
787
- return handler.score(
788
- input_cols=self._infer_input_cols(),
789
- label_cols=self._get_label_cols(),
790
- session=dataset._session,
791
- dependencies=self._deps,
792
- score_sproc_imports=[],
793
- )
794
- else:
795
- return self._invoke_estimator_func("score", dataset)
681
+ return self._invoke_estimator_func("score", dataset)
796
682
 
797
683
  def _invoke_estimator_func(
798
684
  self, func_name: str, dataset: Union[snowpark.DataFrame, pd.DataFrame]
@@ -882,39 +768,6 @@ class Pipeline(base.BaseTransformer):
882
768
 
883
769
  return ct
884
770
 
885
- def _fit_ml_runtime(self, dataset: snowpark.DataFrame) -> None:
886
- """Train the pipeline in the ML Runtime.
887
-
888
- Args:
889
- dataset: The training Snowpark dataframe
890
-
891
- Raises:
892
- ModuleNotFoundError: The ML Runtime Client is not installed.
893
- """
894
- try:
895
- from snowflake.ml.runtime import MLRuntimeClient
896
- except ModuleNotFoundError as e:
897
- # The snowflake.ml.runtime module should always be present when
898
- # the env var IN_SPCS_ML_RUNTIME is present.
899
- raise ModuleNotFoundError("ML Runtime Python Client is not installed.") from e
900
-
901
- client = MLRuntimeClient()
902
- ml_runtime_compatible_pipeline = self._create_unfitted_sklearn_object()
903
-
904
- label_cols = self._get_label_cols()
905
- all_df_cols = dataset.columns
906
- input_cols = [col for col in all_df_cols if col not in label_cols]
907
-
908
- trained_pipeline = client.train(
909
- estimator=ml_runtime_compatible_pipeline,
910
- dataset=dataset,
911
- input_cols=input_cols,
912
- label_cols=label_cols,
913
- sample_weight_col=self.sample_weight_col,
914
- )
915
-
916
- self._sklearn_object = trained_pipeline
917
-
918
771
  def _get_label_cols(self) -> List[str]:
919
772
  """Util function to get the label columns from the pipeline.
920
773
  The label column is only present in the estimator
@@ -929,28 +782,6 @@ class Pipeline(base.BaseTransformer):
929
782
 
930
783
  return label_cols
931
784
 
932
- def _can_be_trained_in_ml_runtime(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> bool:
933
- """A utility function to determine if the pipeline cam be pushed down to the ML Runtime for training.
934
- Currently, this is true if:
935
- - The training dataset is a snowpark dataframe,
936
- - The IN_SPCS_ML_RUNTIME environment is present and
937
- - The pipeline can be converted to an sklearn pipeline.
938
-
939
- Args:
940
- dataset: The training dataset
941
-
942
- Returns:
943
- True if the dataset can be fit in the ml runtime, else false.
944
-
945
- """
946
- if not isinstance(dataset, snowpark.DataFrame):
947
- return False
948
-
949
- if not os.environ.get(IN_ML_RUNTIME_ENV_VAR):
950
- return False
951
-
952
- return self._is_convertible_to_sklearn
953
-
954
785
  @staticmethod
955
786
  def _wrap_transformer_in_column_transformer(
956
787
  transformer_name: str, transformer: base.BaseTransformer
@@ -1124,7 +955,6 @@ class Pipeline(base.BaseTransformer):
1124
955
 
1125
956
  telemetry_data = {
1126
957
  "pipeline_is_convertible_to_sklearn": self._is_convertible_to_sklearn,
1127
- "in_spcs_ml_runtime": bool(os.environ.get(IN_ML_RUNTIME_ENV_VAR)),
1128
958
  }
1129
959
  telemetry.send_custom_usage(
1130
960
  project=_PROJECT,