snowflake-ml-python 1.9.2__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/utils/service_logger.py +31 -17
- snowflake/ml/experiment/callback/keras.py +63 -0
- snowflake/ml/experiment/callback/lightgbm.py +59 -0
- snowflake/ml/experiment/callback/xgboost.py +67 -0
- snowflake/ml/experiment/utils.py +14 -0
- snowflake/ml/jobs/_utils/__init__.py +0 -0
- snowflake/ml/jobs/_utils/constants.py +4 -1
- snowflake/ml/jobs/_utils/payload_utils.py +55 -21
- snowflake/ml/jobs/_utils/query_helper.py +5 -1
- snowflake/ml/jobs/_utils/runtime_env_utils.py +63 -0
- snowflake/ml/jobs/_utils/scripts/get_instance_ip.py +2 -2
- snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +5 -5
- snowflake/ml/jobs/_utils/spec_utils.py +41 -8
- snowflake/ml/jobs/_utils/stage_utils.py +22 -9
- snowflake/ml/jobs/_utils/types.py +5 -7
- snowflake/ml/jobs/job.py +1 -1
- snowflake/ml/jobs/manager.py +1 -13
- snowflake/ml/model/_client/model/model_version_impl.py +219 -55
- snowflake/ml/model/_client/ops/service_ops.py +230 -30
- snowflake/ml/model/_client/service/model_deployment_spec.py +103 -27
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +11 -5
- snowflake/ml/model/_model_composer/model_composer.py +1 -70
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +2 -43
- snowflake/ml/model/event_handler.py +87 -18
- snowflake/ml/model/inference_engine.py +5 -0
- snowflake/ml/model/models/huggingface_pipeline.py +74 -51
- snowflake/ml/model/type_hints.py +26 -1
- snowflake/ml/registry/_manager/model_manager.py +37 -70
- snowflake/ml/registry/_manager/model_parameter_reconciler.py +294 -0
- snowflake/ml/registry/registry.py +0 -19
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/METADATA +523 -491
- {snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/RECORD +36 -29
- snowflake/ml/experiment/callback.py +0 -121
- {snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/top_level.txt +0 -0
|
@@ -121,15 +121,28 @@ class StagePath:
|
|
|
121
121
|
return self._compose_path(self._path)
|
|
122
122
|
|
|
123
123
|
def joinpath(self, *args: Union[str, PathLike[str]]) -> "StagePath":
|
|
124
|
+
"""
|
|
125
|
+
Joins the given path arguments to the current path,
|
|
126
|
+
mimicking the behavior of pathlib.Path.joinpath.
|
|
127
|
+
If the argument is a stage path (i.e., an absolute path),
|
|
128
|
+
it overrides the current path and is returned as the final path.
|
|
129
|
+
If the argument is a normal path, it is joined with the current relative path
|
|
130
|
+
using self._path.joinpath(arg).
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
*args: Path components to join.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
A new StagePath with the joined path.
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
NotImplementedError: the argument is a stage path.
|
|
140
|
+
"""
|
|
124
141
|
path = self
|
|
125
142
|
for arg in args:
|
|
126
|
-
|
|
143
|
+
if isinstance(arg, StagePath):
|
|
144
|
+
raise NotImplementedError
|
|
145
|
+
else:
|
|
146
|
+
# the arg might be an absolute path, so we need to remove the leading '/'
|
|
147
|
+
path = StagePath(f"{path.root}/{path._path.joinpath(arg).as_posix().lstrip('/')}")
|
|
127
148
|
return path
|
|
128
|
-
|
|
129
|
-
def _make_child(self, path: Union[str, PathLike[str]]) -> "StagePath":
|
|
130
|
-
stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
|
|
131
|
-
if self.root == stage_path.root:
|
|
132
|
-
child_path = self._path.joinpath(stage_path._path)
|
|
133
|
-
return StagePath(self._compose_path(child_path))
|
|
134
|
-
else:
|
|
135
|
-
return stage_path
|
|
@@ -30,6 +30,10 @@ class PayloadPath(Protocol):
|
|
|
30
30
|
def parent(self) -> "PayloadPath":
|
|
31
31
|
...
|
|
32
32
|
|
|
33
|
+
@property
|
|
34
|
+
def root(self) -> str:
|
|
35
|
+
...
|
|
36
|
+
|
|
33
37
|
def exists(self) -> bool:
|
|
34
38
|
...
|
|
35
39
|
|
|
@@ -98,12 +102,6 @@ class ComputeResources:
|
|
|
98
102
|
|
|
99
103
|
@dataclass(frozen=True)
|
|
100
104
|
class ImageSpec:
|
|
101
|
-
repo: str
|
|
102
|
-
image_name: str
|
|
103
|
-
image_tag: str
|
|
104
105
|
resource_requests: ComputeResources
|
|
105
106
|
resource_limits: ComputeResources
|
|
106
|
-
|
|
107
|
-
@property
|
|
108
|
-
def full_name(self) -> str:
|
|
109
|
-
return f"{self.repo}/{self.image_name}:{self.image_tag}"
|
|
107
|
+
container_image: str
|
snowflake/ml/jobs/job.py
CHANGED
|
@@ -199,7 +199,7 @@ class MLJob(Generic[T], SerializableSessionMixin):
|
|
|
199
199
|
elapsed = time.monotonic() - start_time
|
|
200
200
|
if elapsed >= timeout >= 0:
|
|
201
201
|
raise TimeoutError(f"Job {self.name} did not complete within {timeout} seconds")
|
|
202
|
-
elif status == "PENDING" and not warning_shown and elapsed >=
|
|
202
|
+
elif status == "PENDING" and not warning_shown and elapsed >= 5: # Only show warning after 5s
|
|
203
203
|
pool_info = _get_compute_pool_info(self._session, self._compute_pool)
|
|
204
204
|
if (pool_info.max_nodes - pool_info.active_nodes) < self.min_instances:
|
|
205
205
|
logger.warning(
|
snowflake/ml/jobs/manager.py
CHANGED
|
@@ -426,7 +426,6 @@ def _submit_job(
|
|
|
426
426
|
|
|
427
427
|
Raises:
|
|
428
428
|
ValueError: If database or schema value(s) are invalid
|
|
429
|
-
SnowparkSQLException: If there is an error submitting the job.
|
|
430
429
|
"""
|
|
431
430
|
session = session or get_active_session()
|
|
432
431
|
|
|
@@ -504,18 +503,7 @@ def _submit_job(
|
|
|
504
503
|
query_text, params = _generate_submission_query(
|
|
505
504
|
spec, external_access_integrations, query_warehouse, target_instances, session, compute_pool, job_id
|
|
506
505
|
)
|
|
507
|
-
|
|
508
|
-
_ = query_helper.run_query(session, query_text, params=params)
|
|
509
|
-
except SnowparkSQLException as e:
|
|
510
|
-
if "Invalid spec: unknown option 'resourceManagement' for 'spec'." in e.message:
|
|
511
|
-
logger.warning("Dropping 'resourceManagement' from spec because control policy is not enabled.")
|
|
512
|
-
spec["spec"].pop("resourceManagement", None)
|
|
513
|
-
query_text, params = _generate_submission_query(
|
|
514
|
-
spec, external_access_integrations, query_warehouse, target_instances, session, compute_pool, job_id
|
|
515
|
-
)
|
|
516
|
-
_ = query_helper.run_query(session, query_text, params=params)
|
|
517
|
-
else:
|
|
518
|
-
raise
|
|
506
|
+
_ = query_helper.run_query(session, query_text, params=params)
|
|
519
507
|
return get_job(job_id, session=session)
|
|
520
508
|
|
|
521
509
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import enum
|
|
2
|
-
import logging
|
|
3
2
|
import pathlib
|
|
4
3
|
import tempfile
|
|
5
4
|
import warnings
|
|
@@ -708,6 +707,128 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
708
707
|
version_name=sql_identifier.SqlIdentifier(version),
|
|
709
708
|
)
|
|
710
709
|
|
|
710
|
+
def _get_inference_engine_args(
|
|
711
|
+
self, experimental_options: Optional[dict[str, Any]]
|
|
712
|
+
) -> Optional[service_ops.InferenceEngineArgs]:
|
|
713
|
+
|
|
714
|
+
if not experimental_options:
|
|
715
|
+
return None
|
|
716
|
+
|
|
717
|
+
if "inference_engine" not in experimental_options:
|
|
718
|
+
raise ValueError("inference_engine is required in experimental_options")
|
|
719
|
+
|
|
720
|
+
return service_ops.InferenceEngineArgs(
|
|
721
|
+
inference_engine=experimental_options["inference_engine"],
|
|
722
|
+
inference_engine_args_override=experimental_options.get("inference_engine_args_override"),
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
def _enrich_inference_engine_args(
|
|
726
|
+
self,
|
|
727
|
+
inference_engine_args: service_ops.InferenceEngineArgs,
|
|
728
|
+
gpu_requests: Optional[Union[str, int]] = None,
|
|
729
|
+
) -> Optional[service_ops.InferenceEngineArgs]:
|
|
730
|
+
"""Enrich inference engine args with model path and tensor parallelism settings.
|
|
731
|
+
|
|
732
|
+
Args:
|
|
733
|
+
inference_engine_args: The original inference engine args
|
|
734
|
+
gpu_requests: The number of GPUs requested
|
|
735
|
+
|
|
736
|
+
Returns:
|
|
737
|
+
Enriched inference engine args
|
|
738
|
+
|
|
739
|
+
Raises:
|
|
740
|
+
ValueError: Invalid gpu_requests
|
|
741
|
+
"""
|
|
742
|
+
if inference_engine_args.inference_engine_args_override is None:
|
|
743
|
+
inference_engine_args.inference_engine_args_override = []
|
|
744
|
+
|
|
745
|
+
# Get model stage path and strip off "snow://" prefix
|
|
746
|
+
model_stage_path = self._model_ops.get_model_version_stage_path(
|
|
747
|
+
database_name=None,
|
|
748
|
+
schema_name=None,
|
|
749
|
+
model_name=self._model_name,
|
|
750
|
+
version_name=self._version_name,
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
# Strip "snow://" prefix
|
|
754
|
+
if model_stage_path.startswith("snow://"):
|
|
755
|
+
model_stage_path = model_stage_path.replace("snow://", "", 1)
|
|
756
|
+
|
|
757
|
+
# Always overwrite the model key by appending
|
|
758
|
+
inference_engine_args.inference_engine_args_override.append(f"--model={model_stage_path}")
|
|
759
|
+
|
|
760
|
+
gpu_count = None
|
|
761
|
+
|
|
762
|
+
# Set tensor-parallelism if gpu_requests is specified
|
|
763
|
+
if gpu_requests is not None:
|
|
764
|
+
# assert gpu_requests is a string or an integer before casting to int
|
|
765
|
+
if isinstance(gpu_requests, str) or isinstance(gpu_requests, int):
|
|
766
|
+
try:
|
|
767
|
+
gpu_count = int(gpu_requests)
|
|
768
|
+
except ValueError:
|
|
769
|
+
raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
|
|
770
|
+
|
|
771
|
+
if gpu_count is not None:
|
|
772
|
+
if gpu_count > 0:
|
|
773
|
+
inference_engine_args.inference_engine_args_override.append(f"--tensor-parallel-size={gpu_count}")
|
|
774
|
+
else:
|
|
775
|
+
raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
|
|
776
|
+
|
|
777
|
+
return inference_engine_args
|
|
778
|
+
|
|
779
|
+
def _check_huggingface_text_generation_model(
|
|
780
|
+
self,
|
|
781
|
+
statement_params: Optional[dict[str, Any]] = None,
|
|
782
|
+
) -> None:
|
|
783
|
+
"""Check if the model is a HuggingFace pipeline with text-generation task.
|
|
784
|
+
|
|
785
|
+
Args:
|
|
786
|
+
statement_params: Optional dictionary of statement parameters to include
|
|
787
|
+
in the SQL command to fetch model spec.
|
|
788
|
+
|
|
789
|
+
Raises:
|
|
790
|
+
ValueError: If the model is not a HuggingFace text-generation model.
|
|
791
|
+
"""
|
|
792
|
+
# Fetch model spec
|
|
793
|
+
model_spec = self._model_ops._fetch_model_spec(
|
|
794
|
+
database_name=None,
|
|
795
|
+
schema_name=None,
|
|
796
|
+
model_name=self._model_name,
|
|
797
|
+
version_name=self._version_name,
|
|
798
|
+
statement_params=statement_params,
|
|
799
|
+
)
|
|
800
|
+
|
|
801
|
+
# Check if model_type is huggingface_pipeline
|
|
802
|
+
model_type = model_spec.get("model_type")
|
|
803
|
+
if model_type != "huggingface_pipeline":
|
|
804
|
+
raise ValueError(
|
|
805
|
+
f"Inference engine is only supported for HuggingFace text-generation models. "
|
|
806
|
+
f"Found model_type: {model_type}"
|
|
807
|
+
)
|
|
808
|
+
|
|
809
|
+
# Check if model supports text-generation task
|
|
810
|
+
# There should only be one model in the list because we don't support multiple models in a single model spec
|
|
811
|
+
models = model_spec.get("models", {})
|
|
812
|
+
is_text_generation = False
|
|
813
|
+
found_tasks: list[str] = []
|
|
814
|
+
|
|
815
|
+
# As long as the model supports text-generation task, we can use it
|
|
816
|
+
for _, model_info in models.items():
|
|
817
|
+
options = model_info.get("options", {})
|
|
818
|
+
task = options.get("task")
|
|
819
|
+
if task:
|
|
820
|
+
found_tasks.append(str(task))
|
|
821
|
+
if task == "text-generation":
|
|
822
|
+
is_text_generation = True
|
|
823
|
+
break
|
|
824
|
+
|
|
825
|
+
if not is_text_generation:
|
|
826
|
+
tasks_str = ", ".join(found_tasks)
|
|
827
|
+
found_tasks_str = (
|
|
828
|
+
f"Found task(s): {tasks_str} in model spec." if found_tasks else "No task found in model spec."
|
|
829
|
+
)
|
|
830
|
+
raise ValueError(f"Inference engine is only supported for task 'text-generation'. {found_tasks_str}")
|
|
831
|
+
|
|
711
832
|
@overload
|
|
712
833
|
def create_service(
|
|
713
834
|
self,
|
|
@@ -715,7 +836,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
715
836
|
service_name: str,
|
|
716
837
|
image_build_compute_pool: Optional[str] = None,
|
|
717
838
|
service_compute_pool: str,
|
|
718
|
-
image_repo: str,
|
|
839
|
+
image_repo: Optional[str] = None,
|
|
719
840
|
ingress_enabled: bool = False,
|
|
720
841
|
max_instances: int = 1,
|
|
721
842
|
cpu_requests: Optional[str] = None,
|
|
@@ -726,6 +847,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
726
847
|
force_rebuild: bool = False,
|
|
727
848
|
build_external_access_integration: Optional[str] = None,
|
|
728
849
|
block: bool = True,
|
|
850
|
+
experimental_options: Optional[dict[str, Any]] = None,
|
|
729
851
|
) -> Union[str, async_job.AsyncJob]:
|
|
730
852
|
"""Create an inference service with the given spec.
|
|
731
853
|
|
|
@@ -736,7 +858,8 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
736
858
|
the service compute pool if None.
|
|
737
859
|
service_compute_pool: The name of the compute pool used to run the inference service.
|
|
738
860
|
image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
|
|
739
|
-
or schema of the model will be used.
|
|
861
|
+
or schema of the model will be used. This can be None, in that case a default hidden image repository
|
|
862
|
+
will be used.
|
|
740
863
|
ingress_enabled: If true, creates an service endpoint associated with the service. User must have
|
|
741
864
|
BIND SERVICE ENDPOINT privilege on the account.
|
|
742
865
|
max_instances: The maximum number of inference service instances to run. The same value it set to
|
|
@@ -757,6 +880,10 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
757
880
|
block: A bool value indicating whether this function will wait until the service is available.
|
|
758
881
|
When it is ``False``, this function executes the underlying service creation asynchronously
|
|
759
882
|
and returns an :class:`AsyncJob`.
|
|
883
|
+
experimental_options: Experimental options for the service creation with custom inference engine.
|
|
884
|
+
Currently, only `inference_engine` and `inference_engine_args_override` are supported.
|
|
885
|
+
`inference_engine` is the name of the inference engine to use.
|
|
886
|
+
`inference_engine_args_override` is a list of string arguments to pass to the inference engine.
|
|
760
887
|
"""
|
|
761
888
|
...
|
|
762
889
|
|
|
@@ -767,7 +894,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
767
894
|
service_name: str,
|
|
768
895
|
image_build_compute_pool: Optional[str] = None,
|
|
769
896
|
service_compute_pool: str,
|
|
770
|
-
image_repo: str,
|
|
897
|
+
image_repo: Optional[str] = None,
|
|
771
898
|
ingress_enabled: bool = False,
|
|
772
899
|
max_instances: int = 1,
|
|
773
900
|
cpu_requests: Optional[str] = None,
|
|
@@ -778,6 +905,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
778
905
|
force_rebuild: bool = False,
|
|
779
906
|
build_external_access_integrations: Optional[list[str]] = None,
|
|
780
907
|
block: bool = True,
|
|
908
|
+
experimental_options: Optional[dict[str, Any]] = None,
|
|
781
909
|
) -> Union[str, async_job.AsyncJob]:
|
|
782
910
|
"""Create an inference service with the given spec.
|
|
783
911
|
|
|
@@ -788,7 +916,8 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
788
916
|
the service compute pool if None.
|
|
789
917
|
service_compute_pool: The name of the compute pool used to run the inference service.
|
|
790
918
|
image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
|
|
791
|
-
or schema of the model will be used.
|
|
919
|
+
or schema of the model will be used. This can be None, in that case a default hidden image repository
|
|
920
|
+
will be used.
|
|
792
921
|
ingress_enabled: If true, creates an service endpoint associated with the service. User must have
|
|
793
922
|
BIND SERVICE ENDPOINT privilege on the account.
|
|
794
923
|
max_instances: The maximum number of inference service instances to run. The same value it set to
|
|
@@ -809,6 +938,10 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
809
938
|
block: A bool value indicating whether this function will wait until the service is available.
|
|
810
939
|
When it is ``False``, this function executes the underlying service creation asynchronously
|
|
811
940
|
and returns an :class:`AsyncJob`.
|
|
941
|
+
experimental_options: Experimental options for the service creation with custom inference engine.
|
|
942
|
+
Currently, only `inference_engine` and `inference_engine_args_override` are supported.
|
|
943
|
+
`inference_engine` is the name of the inference engine to use.
|
|
944
|
+
`inference_engine_args_override` is a list of string arguments to pass to the inference engine.
|
|
812
945
|
"""
|
|
813
946
|
...
|
|
814
947
|
|
|
@@ -833,7 +966,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
833
966
|
service_name: str,
|
|
834
967
|
image_build_compute_pool: Optional[str] = None,
|
|
835
968
|
service_compute_pool: str,
|
|
836
|
-
image_repo: str,
|
|
969
|
+
image_repo: Optional[str] = None,
|
|
837
970
|
ingress_enabled: bool = False,
|
|
838
971
|
max_instances: int = 1,
|
|
839
972
|
cpu_requests: Optional[str] = None,
|
|
@@ -845,6 +978,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
845
978
|
build_external_access_integration: Optional[str] = None,
|
|
846
979
|
build_external_access_integrations: Optional[list[str]] = None,
|
|
847
980
|
block: bool = True,
|
|
981
|
+
experimental_options: Optional[dict[str, Any]] = None,
|
|
848
982
|
) -> Union[str, async_job.AsyncJob]:
|
|
849
983
|
"""Create an inference service with the given spec.
|
|
850
984
|
|
|
@@ -855,7 +989,8 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
855
989
|
the service compute pool if None.
|
|
856
990
|
service_compute_pool: The name of the compute pool used to run the inference service.
|
|
857
991
|
image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
|
|
858
|
-
or schema of the model will be used.
|
|
992
|
+
or schema of the model will be used. This can be None, in that case a default hidden image repository
|
|
993
|
+
will be used.
|
|
859
994
|
ingress_enabled: If true, creates an service endpoint associated with the service. User must have
|
|
860
995
|
BIND SERVICE ENDPOINT privilege on the account.
|
|
861
996
|
max_instances: The maximum number of inference service instances to run. The same value it set to
|
|
@@ -878,29 +1013,28 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
878
1013
|
block: A bool value indicating whether this function will wait until the service is available.
|
|
879
1014
|
When it is False, this function executes the underlying service creation asynchronously
|
|
880
1015
|
and returns an AsyncJob.
|
|
1016
|
+
experimental_options: Experimental options for the service creation with custom inference engine.
|
|
1017
|
+
Currently, only `inference_engine` and `inference_engine_args_override` are supported.
|
|
1018
|
+
`inference_engine` is the name of the inference engine to use.
|
|
1019
|
+
`inference_engine_args_override` is a list of string arguments to pass to the inference engine.
|
|
1020
|
+
|
|
881
1021
|
|
|
882
1022
|
Raises:
|
|
883
1023
|
ValueError: Illegal external access integration arguments.
|
|
1024
|
+
exceptions.SnowparkSQLException: if service already exists.
|
|
884
1025
|
|
|
885
1026
|
Returns:
|
|
886
1027
|
If `block=True`, return result information about service creation from server.
|
|
887
1028
|
Otherwise, return the service creation AsyncJob.
|
|
1029
|
+
|
|
1030
|
+
Raises:
|
|
1031
|
+
ValueError: Illegal external access integration arguments.
|
|
888
1032
|
"""
|
|
889
1033
|
statement_params = telemetry.get_statement_params(
|
|
890
1034
|
project=_TELEMETRY_PROJECT,
|
|
891
1035
|
subproject=_TELEMETRY_SUBPROJECT,
|
|
892
1036
|
)
|
|
893
1037
|
|
|
894
|
-
# Check root logger level and emit warning if needed
|
|
895
|
-
root_logger = logging.getLogger()
|
|
896
|
-
if root_logger.level in (logging.WARNING, logging.ERROR):
|
|
897
|
-
warnings.warn(
|
|
898
|
-
"Suppressing service logs. Set the log level to INFO if you would like "
|
|
899
|
-
"verbose service logs (e.g., logging.getLogger().setLevel(logging.INFO)).",
|
|
900
|
-
UserWarning,
|
|
901
|
-
stacklevel=2,
|
|
902
|
-
)
|
|
903
|
-
|
|
904
1038
|
if build_external_access_integration is not None:
|
|
905
1039
|
msg = (
|
|
906
1040
|
"`build_external_access_integration` is deprecated. "
|
|
@@ -916,41 +1050,74 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
916
1050
|
build_external_access_integrations = [build_external_access_integration]
|
|
917
1051
|
|
|
918
1052
|
service_db_id, service_schema_id, service_id = sql_identifier.parse_fully_qualified_name(service_name)
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
service_schema_name=service_schema_id,
|
|
927
|
-
service_name=service_id,
|
|
928
|
-
image_build_compute_pool_name=(
|
|
929
|
-
sql_identifier.SqlIdentifier(image_build_compute_pool)
|
|
930
|
-
if image_build_compute_pool
|
|
931
|
-
else sql_identifier.SqlIdentifier(service_compute_pool)
|
|
932
|
-
),
|
|
933
|
-
service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
|
|
934
|
-
image_repo_database_name=image_repo_db_id,
|
|
935
|
-
image_repo_schema_name=image_repo_schema_id,
|
|
936
|
-
image_repo_name=image_repo_id,
|
|
937
|
-
ingress_enabled=ingress_enabled,
|
|
938
|
-
max_instances=max_instances,
|
|
939
|
-
cpu_requests=cpu_requests,
|
|
940
|
-
memory_requests=memory_requests,
|
|
941
|
-
gpu_requests=gpu_requests,
|
|
942
|
-
num_workers=num_workers,
|
|
943
|
-
max_batch_rows=max_batch_rows,
|
|
944
|
-
force_rebuild=force_rebuild,
|
|
945
|
-
build_external_access_integrations=(
|
|
946
|
-
None
|
|
947
|
-
if build_external_access_integrations is None
|
|
948
|
-
else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
|
|
949
|
-
),
|
|
950
|
-
block=block,
|
|
951
|
-
statement_params=statement_params,
|
|
1053
|
+
|
|
1054
|
+
# Check if model is HuggingFace text-generation before doing inference engine checks
|
|
1055
|
+
if experimental_options:
|
|
1056
|
+
self._check_huggingface_text_generation_model(statement_params)
|
|
1057
|
+
|
|
1058
|
+
inference_engine_args: Optional[service_ops.InferenceEngineArgs] = self._get_inference_engine_args(
|
|
1059
|
+
experimental_options
|
|
952
1060
|
)
|
|
953
1061
|
|
|
1062
|
+
# Enrich inference engine args if inference engine is specified
|
|
1063
|
+
if inference_engine_args is not None:
|
|
1064
|
+
inference_engine_args = self._enrich_inference_engine_args(inference_engine_args, gpu_requests)
|
|
1065
|
+
|
|
1066
|
+
from snowflake.ml.model import event_handler
|
|
1067
|
+
from snowflake.snowpark import exceptions
|
|
1068
|
+
|
|
1069
|
+
model_event_handler = event_handler.ModelEventHandler()
|
|
1070
|
+
|
|
1071
|
+
with model_event_handler.status("Creating model inference service", total=6, block=block) as status:
|
|
1072
|
+
try:
|
|
1073
|
+
result = self._service_ops.create_service(
|
|
1074
|
+
database_name=None,
|
|
1075
|
+
schema_name=None,
|
|
1076
|
+
model_name=self._model_name,
|
|
1077
|
+
version_name=self._version_name,
|
|
1078
|
+
service_database_name=service_db_id,
|
|
1079
|
+
service_schema_name=service_schema_id,
|
|
1080
|
+
service_name=service_id,
|
|
1081
|
+
image_build_compute_pool_name=(
|
|
1082
|
+
sql_identifier.SqlIdentifier(image_build_compute_pool)
|
|
1083
|
+
if image_build_compute_pool
|
|
1084
|
+
else sql_identifier.SqlIdentifier(service_compute_pool)
|
|
1085
|
+
),
|
|
1086
|
+
service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
|
|
1087
|
+
image_repo_name=image_repo,
|
|
1088
|
+
ingress_enabled=ingress_enabled,
|
|
1089
|
+
max_instances=max_instances,
|
|
1090
|
+
cpu_requests=cpu_requests,
|
|
1091
|
+
memory_requests=memory_requests,
|
|
1092
|
+
gpu_requests=gpu_requests,
|
|
1093
|
+
num_workers=num_workers,
|
|
1094
|
+
max_batch_rows=max_batch_rows,
|
|
1095
|
+
force_rebuild=force_rebuild,
|
|
1096
|
+
build_external_access_integrations=(
|
|
1097
|
+
None
|
|
1098
|
+
if build_external_access_integrations is None
|
|
1099
|
+
else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
|
|
1100
|
+
),
|
|
1101
|
+
block=block,
|
|
1102
|
+
statement_params=statement_params,
|
|
1103
|
+
progress_status=status,
|
|
1104
|
+
inference_engine_args=inference_engine_args,
|
|
1105
|
+
)
|
|
1106
|
+
status.update(label="Model service created successfully", state="complete", expanded=False)
|
|
1107
|
+
return result
|
|
1108
|
+
except exceptions.SnowparkSQLException as e:
|
|
1109
|
+
# Check if the error is because the service already exists
|
|
1110
|
+
if "already exists" in str(e).lower() or "100132" in str(
|
|
1111
|
+
e
|
|
1112
|
+
): # 100132 is Snowflake error code for object already exists
|
|
1113
|
+
status.update("service already exists")
|
|
1114
|
+
status.complete()
|
|
1115
|
+
status.update(label="Service already exists", state="error", expanded=False)
|
|
1116
|
+
raise
|
|
1117
|
+
else:
|
|
1118
|
+
status.update(label="Service creation failed", state="error", expanded=False)
|
|
1119
|
+
raise
|
|
1120
|
+
|
|
954
1121
|
@telemetry.send_api_usage_telemetry(
|
|
955
1122
|
project=_TELEMETRY_PROJECT,
|
|
956
1123
|
subproject=_TELEMETRY_SUBPROJECT,
|
|
@@ -1028,7 +1195,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
1028
1195
|
*,
|
|
1029
1196
|
job_name: str,
|
|
1030
1197
|
compute_pool: str,
|
|
1031
|
-
image_repo: str,
|
|
1198
|
+
image_repo: Optional[str] = None,
|
|
1032
1199
|
output_table_name: str,
|
|
1033
1200
|
function_name: Optional[str] = None,
|
|
1034
1201
|
cpu_requests: Optional[str] = None,
|
|
@@ -1045,7 +1212,6 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
1045
1212
|
)
|
|
1046
1213
|
target_function_info = self._get_function_info(function_name=function_name)
|
|
1047
1214
|
job_db_id, job_schema_id, job_id = sql_identifier.parse_fully_qualified_name(job_name)
|
|
1048
|
-
image_repo_db_id, image_repo_schema_id, image_repo_id = sql_identifier.parse_fully_qualified_name(image_repo)
|
|
1049
1215
|
output_table_db_id, output_table_schema_id, output_table_id = sql_identifier.parse_fully_qualified_name(
|
|
1050
1216
|
output_table_name
|
|
1051
1217
|
)
|
|
@@ -1064,9 +1230,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
1064
1230
|
job_name=job_id,
|
|
1065
1231
|
compute_pool_name=sql_identifier.SqlIdentifier(compute_pool),
|
|
1066
1232
|
warehouse_name=sql_identifier.SqlIdentifier(warehouse),
|
|
1067
|
-
|
|
1068
|
-
image_repo_schema_name=image_repo_schema_id,
|
|
1069
|
-
image_repo_name=image_repo_id,
|
|
1233
|
+
image_repo_name=image_repo,
|
|
1070
1234
|
output_table_database_name=output_table_db_id,
|
|
1071
1235
|
output_table_schema_name=output_table_schema_id,
|
|
1072
1236
|
output_table_name=output_table_id,
|