snowflake-ml-python 1.13.0__py3-none-any.whl → 1.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/platform_capabilities.py +9 -7
- snowflake/ml/_internal/utils/connection_params.py +5 -3
- snowflake/ml/_internal/utils/jwt_generator.py +3 -2
- snowflake/ml/_internal/utils/temp_file_utils.py +1 -2
- snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +16 -3
- snowflake/ml/experiment/_entities/__init__.py +2 -1
- snowflake/ml/experiment/_entities/run.py +0 -15
- snowflake/ml/experiment/_entities/run_metadata.py +3 -51
- snowflake/ml/experiment/experiment_tracking.py +8 -8
- snowflake/ml/jobs/_utils/constants.py +1 -1
- snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +9 -7
- snowflake/ml/jobs/job.py +12 -4
- snowflake/ml/jobs/manager.py +34 -7
- snowflake/ml/lineage/lineage_node.py +0 -1
- snowflake/ml/model/__init__.py +2 -6
- snowflake/ml/model/_client/model/batch_inference_specs.py +0 -4
- snowflake/ml/model/_client/model/inference_engine_utils.py +55 -0
- snowflake/ml/model/_client/model/model_version_impl.py +25 -77
- snowflake/ml/model/_client/ops/model_ops.py +9 -2
- snowflake/ml/model/_client/ops/service_ops.py +82 -36
- snowflake/ml/model/_client/sql/service.py +29 -5
- snowflake/ml/model/_packager/model_handlers/_utils.py +4 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +7 -5
- snowflake/ml/model/_packager/model_packager.py +4 -3
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +0 -1
- snowflake/ml/model/_signatures/utils.py +0 -21
- snowflake/ml/model/models/huggingface_pipeline.py +56 -21
- snowflake/ml/monitoring/_client/model_monitor_sql_client.py +47 -3
- snowflake/ml/monitoring/_manager/model_monitor_manager.py +3 -0
- snowflake/ml/monitoring/entities/model_monitor_config.py +3 -0
- snowflake/ml/monitoring/model_monitor.py +30 -0
- snowflake/ml/registry/_manager/model_manager.py +1 -1
- snowflake/ml/registry/_manager/model_parameter_reconciler.py +2 -2
- snowflake/ml/utils/connection_params.py +5 -3
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.13.0.dist-info → snowflake_ml_python-1.15.0.dist-info}/METADATA +51 -34
- {snowflake_ml_python-1.13.0.dist-info → snowflake_ml_python-1.15.0.dist-info}/RECORD +40 -39
- {snowflake_ml_python-1.13.0.dist-info → snowflake_ml_python-1.15.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.13.0.dist-info → snowflake_ml_python-1.15.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowflake_ml_python-1.13.0.dist-info → snowflake_ml_python-1.15.0.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,10 @@ from snowflake.ml._internal import telemetry
|
|
|
12
12
|
from snowflake.ml._internal.utils import sql_identifier
|
|
13
13
|
from snowflake.ml.lineage import lineage_node
|
|
14
14
|
from snowflake.ml.model import task, type_hints
|
|
15
|
-
from snowflake.ml.model._client.model import
|
|
15
|
+
from snowflake.ml.model._client.model import (
|
|
16
|
+
batch_inference_specs,
|
|
17
|
+
inference_engine_utils,
|
|
18
|
+
)
|
|
16
19
|
from snowflake.ml.model._client.ops import metadata_ops, model_ops, service_ops
|
|
17
20
|
from snowflake.ml.model._model_composer import model_composer
|
|
18
21
|
from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
|
|
@@ -22,6 +25,7 @@ from snowflake.snowpark import Session, async_job, dataframe
|
|
|
22
25
|
_TELEMETRY_PROJECT = "MLOps"
|
|
23
26
|
_TELEMETRY_SUBPROJECT = "ModelManagement"
|
|
24
27
|
_BATCH_INFERENCE_JOB_ID_PREFIX = "BATCH_INFERENCE_"
|
|
28
|
+
_BATCH_INFERENCE_TEMPORARY_FOLDER = "_temporary"
|
|
25
29
|
|
|
26
30
|
|
|
27
31
|
class ExportMode(enum.Enum):
|
|
@@ -553,7 +557,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
553
557
|
self,
|
|
554
558
|
*,
|
|
555
559
|
compute_pool: str,
|
|
556
|
-
input_spec:
|
|
560
|
+
input_spec: dataframe.DataFrame,
|
|
557
561
|
output_spec: batch_inference_specs.OutputSpec,
|
|
558
562
|
job_spec: Optional[batch_inference_specs.JobSpec] = None,
|
|
559
563
|
) -> jobs.MLJob[Any]:
|
|
@@ -569,6 +573,18 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
569
573
|
if warehouse is None:
|
|
570
574
|
raise ValueError("Warehouse is not set. Please set the warehouse field in the JobSpec.")
|
|
571
575
|
|
|
576
|
+
# use a temporary folder in the output stage to store the intermediate output from the dataframe
|
|
577
|
+
output_stage_location = output_spec.stage_location
|
|
578
|
+
if not output_stage_location.endswith("/"):
|
|
579
|
+
output_stage_location += "/"
|
|
580
|
+
input_stage_location = f"{output_stage_location}{_BATCH_INFERENCE_TEMPORARY_FOLDER}/"
|
|
581
|
+
|
|
582
|
+
try:
|
|
583
|
+
input_spec.write.copy_into_location(location=input_stage_location, file_format_type="parquet", header=True)
|
|
584
|
+
# todo: be specific about the type of errors to provide better error messages.
|
|
585
|
+
except Exception as e:
|
|
586
|
+
raise RuntimeError(f"Failed to process input_spec: {e}")
|
|
587
|
+
|
|
572
588
|
if job_spec.job_name is None:
|
|
573
589
|
# Same as the MLJob ID generation logic with a different prefix
|
|
574
590
|
job_name = f"{_BATCH_INFERENCE_JOB_ID_PREFIX}{str(uuid.uuid4()).replace('-', '_').upper()}"
|
|
@@ -592,9 +608,9 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
592
608
|
job_name=job_name,
|
|
593
609
|
replicas=job_spec.replicas,
|
|
594
610
|
# input and output
|
|
595
|
-
input_stage_location=
|
|
611
|
+
input_stage_location=input_stage_location,
|
|
596
612
|
input_file_pattern="*",
|
|
597
|
-
output_stage_location=
|
|
613
|
+
output_stage_location=output_stage_location,
|
|
598
614
|
completion_filename="_SUCCESS",
|
|
599
615
|
# misc
|
|
600
616
|
statement_params=statement_params,
|
|
@@ -768,75 +784,6 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
768
784
|
version_name=sql_identifier.SqlIdentifier(version),
|
|
769
785
|
)
|
|
770
786
|
|
|
771
|
-
def _get_inference_engine_args(
|
|
772
|
-
self, experimental_options: Optional[dict[str, Any]]
|
|
773
|
-
) -> Optional[service_ops.InferenceEngineArgs]:
|
|
774
|
-
|
|
775
|
-
if not experimental_options:
|
|
776
|
-
return None
|
|
777
|
-
|
|
778
|
-
if "inference_engine" not in experimental_options:
|
|
779
|
-
raise ValueError("inference_engine is required in experimental_options")
|
|
780
|
-
|
|
781
|
-
return service_ops.InferenceEngineArgs(
|
|
782
|
-
inference_engine=experimental_options["inference_engine"],
|
|
783
|
-
inference_engine_args_override=experimental_options.get("inference_engine_args_override"),
|
|
784
|
-
)
|
|
785
|
-
|
|
786
|
-
def _enrich_inference_engine_args(
|
|
787
|
-
self,
|
|
788
|
-
inference_engine_args: service_ops.InferenceEngineArgs,
|
|
789
|
-
gpu_requests: Optional[Union[str, int]] = None,
|
|
790
|
-
) -> Optional[service_ops.InferenceEngineArgs]:
|
|
791
|
-
"""Enrich inference engine args with model path and tensor parallelism settings.
|
|
792
|
-
|
|
793
|
-
Args:
|
|
794
|
-
inference_engine_args: The original inference engine args
|
|
795
|
-
gpu_requests: The number of GPUs requested
|
|
796
|
-
|
|
797
|
-
Returns:
|
|
798
|
-
Enriched inference engine args
|
|
799
|
-
|
|
800
|
-
Raises:
|
|
801
|
-
ValueError: Invalid gpu_requests
|
|
802
|
-
"""
|
|
803
|
-
if inference_engine_args.inference_engine_args_override is None:
|
|
804
|
-
inference_engine_args.inference_engine_args_override = []
|
|
805
|
-
|
|
806
|
-
# Get model stage path and strip off "snow://" prefix
|
|
807
|
-
model_stage_path = self._model_ops.get_model_version_stage_path(
|
|
808
|
-
database_name=None,
|
|
809
|
-
schema_name=None,
|
|
810
|
-
model_name=self._model_name,
|
|
811
|
-
version_name=self._version_name,
|
|
812
|
-
)
|
|
813
|
-
|
|
814
|
-
# Strip "snow://" prefix
|
|
815
|
-
if model_stage_path.startswith("snow://"):
|
|
816
|
-
model_stage_path = model_stage_path.replace("snow://", "", 1)
|
|
817
|
-
|
|
818
|
-
# Always overwrite the model key by appending
|
|
819
|
-
inference_engine_args.inference_engine_args_override.append(f"--model={model_stage_path}")
|
|
820
|
-
|
|
821
|
-
gpu_count = None
|
|
822
|
-
|
|
823
|
-
# Set tensor-parallelism if gpu_requests is specified
|
|
824
|
-
if gpu_requests is not None:
|
|
825
|
-
# assert gpu_requests is a string or an integer before casting to int
|
|
826
|
-
if isinstance(gpu_requests, str) or isinstance(gpu_requests, int):
|
|
827
|
-
try:
|
|
828
|
-
gpu_count = int(gpu_requests)
|
|
829
|
-
except ValueError:
|
|
830
|
-
raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
|
|
831
|
-
|
|
832
|
-
if gpu_count is not None:
|
|
833
|
-
if gpu_count > 0:
|
|
834
|
-
inference_engine_args.inference_engine_args_override.append(f"--tensor-parallel-size={gpu_count}")
|
|
835
|
-
else:
|
|
836
|
-
raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
|
|
837
|
-
|
|
838
|
-
return inference_engine_args
|
|
839
|
-
|
|
840
787
|
def _check_huggingface_text_generation_model(
|
|
841
788
|
self,
|
|
842
789
|
statement_params: Optional[dict[str, Any]] = None,
|
|
@@ -1116,13 +1063,14 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
1116
1063
|
if experimental_options:
|
|
1117
1064
|
self._check_huggingface_text_generation_model(statement_params)
|
|
1118
1065
|
|
|
1119
|
-
inference_engine_args
|
|
1120
|
-
experimental_options
|
|
1121
|
-
)
|
|
1066
|
+
inference_engine_args = inference_engine_utils._get_inference_engine_args(experimental_options)
|
|
1122
1067
|
|
|
1123
1068
|
# Enrich inference engine args if inference engine is specified
|
|
1124
1069
|
if inference_engine_args is not None:
|
|
1125
|
-
inference_engine_args =
|
|
1070
|
+
inference_engine_args = inference_engine_utils._enrich_inference_engine_args(
|
|
1071
|
+
inference_engine_args,
|
|
1072
|
+
gpu_requests,
|
|
1073
|
+
)
|
|
1126
1074
|
|
|
1127
1075
|
from snowflake.ml.model import event_handler
|
|
1128
1076
|
from snowflake.snowpark import exceptions
|
|
@@ -47,7 +47,8 @@ class ServiceInfo(TypedDict):
|
|
|
47
47
|
class ModelOperator:
|
|
48
48
|
INFERENCE_SERVICE_ENDPOINT_NAME = "inference"
|
|
49
49
|
INGRESS_ENDPOINT_URL_SUFFIX = "snowflakecomputing.app"
|
|
50
|
-
|
|
50
|
+
# app-service-privatelink might not contain "snowflakecomputing" in the url - using the minimum required substring
|
|
51
|
+
PRIVATELINK_INGRESS_ENDPOINT_URL_SUBSTRING = "privatelink.snowflake"
|
|
51
52
|
|
|
52
53
|
def __init__(
|
|
53
54
|
self,
|
|
@@ -631,7 +632,13 @@ class ModelOperator:
|
|
|
631
632
|
|
|
632
633
|
def _extract_and_validate_privatelink_url(self, res_row: "row.Row") -> Optional[str]:
|
|
633
634
|
"""Extract and validate privatelink ingress URL from endpoint row."""
|
|
634
|
-
|
|
635
|
+
# Check if the privatelink_ingress_url column exists
|
|
636
|
+
col_name = self._service_client.MODEL_INFERENCE_SERVICE_ENDPOINT_PRIVATELINK_INGRESS_URL_COL_NAME
|
|
637
|
+
if col_name not in res_row:
|
|
638
|
+
# Column doesn't exist in query result for non-Business Critical accounts
|
|
639
|
+
return None
|
|
640
|
+
|
|
641
|
+
url_value = res_row[col_name]
|
|
635
642
|
if url_value is None:
|
|
636
643
|
return None
|
|
637
644
|
url_str = str(url_value)
|
|
@@ -155,7 +155,8 @@ class ServiceOperator:
|
|
|
155
155
|
database_name=database_name,
|
|
156
156
|
schema_name=schema_name,
|
|
157
157
|
)
|
|
158
|
-
|
|
158
|
+
self._use_inlined_deployment_spec = pc.PlatformCapabilities.get_instance().is_inlined_deployment_spec_enabled()
|
|
159
|
+
if self._use_inlined_deployment_spec:
|
|
159
160
|
self._workspace = None
|
|
160
161
|
self._model_deployment_spec = model_deployment_spec.ModelDeploymentSpec()
|
|
161
162
|
else:
|
|
@@ -264,7 +265,14 @@ class ServiceOperator:
|
|
|
264
265
|
self._model_deployment_spec.add_hf_logger_spec(
|
|
265
266
|
hf_model_name=hf_model_args.hf_model_name,
|
|
266
267
|
hf_task=hf_model_args.hf_task,
|
|
267
|
-
hf_token=
|
|
268
|
+
hf_token=(
|
|
269
|
+
# when using inlined deployment spec, we need to use QMARK_RESERVED_TOKEN
|
|
270
|
+
# to avoid revealing the token while calling the SYSTEM$DEPLOY_MODEL function
|
|
271
|
+
# noop if using file-based deployment spec or token is not provided
|
|
272
|
+
service_sql.QMARK_RESERVED_TOKEN
|
|
273
|
+
if hf_model_args.hf_token and self._use_inlined_deployment_spec
|
|
274
|
+
else hf_model_args.hf_token
|
|
275
|
+
),
|
|
268
276
|
hf_tokenizer=hf_model_args.hf_tokenizer,
|
|
269
277
|
hf_revision=hf_model_args.hf_revision,
|
|
270
278
|
hf_trust_remote_code=hf_model_args.hf_trust_remote_code,
|
|
@@ -320,20 +328,31 @@ class ServiceOperator:
|
|
|
320
328
|
model_deployment_spec.ModelDeploymentSpec.DEPLOY_SPEC_FILE_REL_PATH if self._workspace else None
|
|
321
329
|
),
|
|
322
330
|
model_deployment_spec_yaml_str=None if self._workspace else spec_yaml_str_or_path,
|
|
331
|
+
query_params=(
|
|
332
|
+
# when using inlined deployment spec, we need to add the token to the query params
|
|
333
|
+
# to avoid revealing the token while calling the SYSTEM$DEPLOY_MODEL function
|
|
334
|
+
# noop if using file-based deployment spec or token is not provided
|
|
335
|
+
[hf_model_args.hf_token]
|
|
336
|
+
if (self._use_inlined_deployment_spec and hf_model_args and hf_model_args.hf_token)
|
|
337
|
+
else []
|
|
338
|
+
),
|
|
323
339
|
statement_params=statement_params,
|
|
324
340
|
)
|
|
325
341
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
342
|
+
model_build_service: Optional[ServiceLogInfo] = None
|
|
343
|
+
if is_enable_image_build:
|
|
344
|
+
# stream service logs in a thread
|
|
345
|
+
model_build_service_name = sql_identifier.SqlIdentifier(
|
|
346
|
+
self._get_service_id_from_deployment_step(query_id, DeploymentStep.MODEL_BUILD)
|
|
347
|
+
)
|
|
348
|
+
model_build_service = ServiceLogInfo(
|
|
349
|
+
database_name=service_database_name,
|
|
350
|
+
schema_name=service_schema_name,
|
|
351
|
+
service_name=model_build_service_name,
|
|
352
|
+
deployment_step=DeploymentStep.MODEL_BUILD,
|
|
353
|
+
log_color=service_logger.LogColor.GREEN,
|
|
354
|
+
)
|
|
355
|
+
|
|
337
356
|
model_inference_service = ServiceLogInfo(
|
|
338
357
|
database_name=service_database_name,
|
|
339
358
|
schema_name=service_schema_name,
|
|
@@ -375,7 +394,7 @@ class ServiceOperator:
|
|
|
375
394
|
progress_status.increment()
|
|
376
395
|
|
|
377
396
|
# Poll for model build to start if not using existing service
|
|
378
|
-
if not model_inference_service_exists:
|
|
397
|
+
if not model_inference_service_exists and model_build_service:
|
|
379
398
|
self._wait_for_service_status(
|
|
380
399
|
model_build_service_name,
|
|
381
400
|
service_sql.ServiceStatus.RUNNING,
|
|
@@ -390,7 +409,7 @@ class ServiceOperator:
|
|
|
390
409
|
progress_status.increment()
|
|
391
410
|
|
|
392
411
|
# Poll for model build completion
|
|
393
|
-
if not model_inference_service_exists:
|
|
412
|
+
if not model_inference_service_exists and model_build_service:
|
|
394
413
|
self._wait_for_service_status(
|
|
395
414
|
model_build_service_name,
|
|
396
415
|
service_sql.ServiceStatus.DONE,
|
|
@@ -454,7 +473,7 @@ class ServiceOperator:
|
|
|
454
473
|
self,
|
|
455
474
|
async_job: snowpark.AsyncJob,
|
|
456
475
|
model_logger_service: Optional[ServiceLogInfo],
|
|
457
|
-
model_build_service: ServiceLogInfo,
|
|
476
|
+
model_build_service: Optional[ServiceLogInfo],
|
|
458
477
|
model_inference_service: ServiceLogInfo,
|
|
459
478
|
model_inference_service_exists: bool,
|
|
460
479
|
force_rebuild: bool,
|
|
@@ -483,7 +502,7 @@ class ServiceOperator:
|
|
|
483
502
|
self,
|
|
484
503
|
force_rebuild: bool,
|
|
485
504
|
service_log_meta: ServiceLogMetadata,
|
|
486
|
-
model_build_service: ServiceLogInfo,
|
|
505
|
+
model_build_service: Optional[ServiceLogInfo],
|
|
487
506
|
model_inference_service: ServiceLogInfo,
|
|
488
507
|
operation_id: str,
|
|
489
508
|
statement_params: Optional[dict[str, Any]] = None,
|
|
@@ -599,13 +618,24 @@ class ServiceOperator:
|
|
|
599
618
|
# check if model logger service is done
|
|
600
619
|
# and transition the service log metadata to the model image build service
|
|
601
620
|
if service.deployment_step == DeploymentStep.MODEL_LOGGING:
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
621
|
+
if model_build_service:
|
|
622
|
+
# building the inference image, transition to the model build service
|
|
623
|
+
service_log_meta.transition_service_log_metadata(
|
|
624
|
+
model_build_service,
|
|
625
|
+
f"Model Logger service {service.display_service_name} complete.",
|
|
626
|
+
is_model_build_service_done=False,
|
|
627
|
+
is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
|
|
628
|
+
operation_id=operation_id,
|
|
629
|
+
)
|
|
630
|
+
else:
|
|
631
|
+
# no model build service, transition to the model inference service
|
|
632
|
+
service_log_meta.transition_service_log_metadata(
|
|
633
|
+
model_inference_service,
|
|
634
|
+
f"Model Logger service {service.display_service_name} complete.",
|
|
635
|
+
is_model_build_service_done=True,
|
|
636
|
+
is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
|
|
637
|
+
operation_id=operation_id,
|
|
638
|
+
)
|
|
609
639
|
# check if model build service is done
|
|
610
640
|
# and transition the service log metadata to the model inference service
|
|
611
641
|
elif service.deployment_step == DeploymentStep.MODEL_BUILD:
|
|
@@ -616,6 +646,8 @@ class ServiceOperator:
|
|
|
616
646
|
is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
|
|
617
647
|
operation_id=operation_id,
|
|
618
648
|
)
|
|
649
|
+
elif service.deployment_step == DeploymentStep.MODEL_INFERENCE:
|
|
650
|
+
module_logger.info(f"Inference service {service.display_service_name} is deployed.")
|
|
619
651
|
else:
|
|
620
652
|
module_logger.warning(f"Service {service.display_service_name} is done, but not transitioning.")
|
|
621
653
|
|
|
@@ -623,7 +655,7 @@ class ServiceOperator:
|
|
|
623
655
|
self,
|
|
624
656
|
async_job: snowpark.AsyncJob,
|
|
625
657
|
model_logger_service: Optional[ServiceLogInfo],
|
|
626
|
-
model_build_service: ServiceLogInfo,
|
|
658
|
+
model_build_service: Optional[ServiceLogInfo],
|
|
627
659
|
model_inference_service: ServiceLogInfo,
|
|
628
660
|
model_inference_service_exists: bool,
|
|
629
661
|
force_rebuild: bool,
|
|
@@ -632,14 +664,23 @@ class ServiceOperator:
|
|
|
632
664
|
) -> None:
|
|
633
665
|
"""Stream service logs while the async job is running."""
|
|
634
666
|
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
667
|
+
if model_build_service:
|
|
668
|
+
model_build_service_logger = service_logger.get_logger(
|
|
669
|
+
model_build_service.display_service_name, # BuildJobName
|
|
670
|
+
model_build_service.log_color,
|
|
671
|
+
operation_id=operation_id,
|
|
672
|
+
)
|
|
673
|
+
service_log_meta = ServiceLogMetadata(
|
|
674
|
+
service_logger=model_build_service_logger,
|
|
675
|
+
service=model_build_service,
|
|
676
|
+
service_status=None,
|
|
677
|
+
is_model_build_service_done=False,
|
|
678
|
+
is_model_logger_service_done=True,
|
|
679
|
+
log_offset=0,
|
|
680
|
+
)
|
|
681
|
+
elif model_logger_service:
|
|
682
|
+
model_logger_service_logger = service_logger.get_logger(
|
|
683
|
+
model_logger_service.display_service_name, # ModelLoggerName
|
|
643
684
|
model_logger_service.log_color,
|
|
644
685
|
operation_id=operation_id,
|
|
645
686
|
)
|
|
@@ -653,12 +694,17 @@ class ServiceOperator:
|
|
|
653
694
|
log_offset=0,
|
|
654
695
|
)
|
|
655
696
|
else:
|
|
697
|
+
model_inference_service_logger = service_logger.get_logger(
|
|
698
|
+
model_inference_service.display_service_name, # ModelInferenceName
|
|
699
|
+
model_inference_service.log_color,
|
|
700
|
+
operation_id=operation_id,
|
|
701
|
+
)
|
|
656
702
|
service_log_meta = ServiceLogMetadata(
|
|
657
|
-
service_logger=
|
|
658
|
-
service=
|
|
703
|
+
service_logger=model_inference_service_logger,
|
|
704
|
+
service=model_inference_service,
|
|
659
705
|
service_status=None,
|
|
660
706
|
is_model_build_service_done=False,
|
|
661
|
-
is_model_logger_service_done=
|
|
707
|
+
is_model_logger_service_done=False,
|
|
662
708
|
log_offset=0,
|
|
663
709
|
)
|
|
664
710
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import dataclasses
|
|
2
3
|
import enum
|
|
3
4
|
import logging
|
|
4
5
|
import textwrap
|
|
5
|
-
from typing import Any, Optional
|
|
6
|
+
from typing import Any, Generator, Optional
|
|
6
7
|
|
|
7
8
|
from snowflake import snowpark
|
|
8
9
|
from snowflake.ml._internal.utils import (
|
|
@@ -17,6 +18,11 @@ from snowflake.snowpark._internal import utils as snowpark_utils
|
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
21
|
+
# Using this token instead of '?' to avoid escaping issues
|
|
22
|
+
# After quotes are escaped, we replace this token with '|| ? ||'
|
|
23
|
+
QMARK_RESERVED_TOKEN = "<QMARK_RESERVED_TOKEN>"
|
|
24
|
+
QMARK_PARAMETER_TOKEN = "'|| ? ||'"
|
|
25
|
+
|
|
20
26
|
|
|
21
27
|
class ServiceStatus(enum.Enum):
|
|
22
28
|
PENDING = "PENDING"
|
|
@@ -70,12 +76,26 @@ class ServiceSQLClient(_base._BaseSQLClient):
|
|
|
70
76
|
CONTAINER_STATUS = "status"
|
|
71
77
|
MESSAGE = "message"
|
|
72
78
|
|
|
79
|
+
@contextlib.contextmanager
|
|
80
|
+
def _qmark_paramstyle(self) -> Generator[None, None, None]:
|
|
81
|
+
"""Context manager that temporarily changes paramstyle to qmark and restores original value on exit."""
|
|
82
|
+
if not hasattr(self._session, "_options"):
|
|
83
|
+
yield
|
|
84
|
+
else:
|
|
85
|
+
original_paramstyle = self._session._options["paramstyle"]
|
|
86
|
+
try:
|
|
87
|
+
self._session._options["paramstyle"] = "qmark"
|
|
88
|
+
yield
|
|
89
|
+
finally:
|
|
90
|
+
self._session._options["paramstyle"] = original_paramstyle
|
|
91
|
+
|
|
73
92
|
def deploy_model(
|
|
74
93
|
self,
|
|
75
94
|
*,
|
|
76
95
|
stage_path: Optional[str] = None,
|
|
77
96
|
model_deployment_spec_yaml_str: Optional[str] = None,
|
|
78
97
|
model_deployment_spec_file_rel_path: Optional[str] = None,
|
|
98
|
+
query_params: Optional[list[Any]] = None,
|
|
79
99
|
statement_params: Optional[dict[str, Any]] = None,
|
|
80
100
|
) -> tuple[str, snowpark.AsyncJob]:
|
|
81
101
|
assert model_deployment_spec_yaml_str or model_deployment_spec_file_rel_path
|
|
@@ -83,11 +103,18 @@ class ServiceSQLClient(_base._BaseSQLClient):
|
|
|
83
103
|
model_deployment_spec_yaml_str = snowpark_utils.escape_single_quotes(
|
|
84
104
|
model_deployment_spec_yaml_str
|
|
85
105
|
) # type: ignore[no-untyped-call]
|
|
106
|
+
model_deployment_spec_yaml_str = model_deployment_spec_yaml_str.replace( # type: ignore[union-attr]
|
|
107
|
+
QMARK_RESERVED_TOKEN, QMARK_PARAMETER_TOKEN
|
|
108
|
+
)
|
|
86
109
|
logger.info(f"Deploying model with spec={model_deployment_spec_yaml_str}")
|
|
87
110
|
sql_str = f"CALL SYSTEM$DEPLOY_MODEL('{model_deployment_spec_yaml_str}')"
|
|
88
111
|
else:
|
|
89
112
|
sql_str = f"CALL SYSTEM$DEPLOY_MODEL('@{stage_path}/{model_deployment_spec_file_rel_path}')"
|
|
90
|
-
|
|
113
|
+
with self._qmark_paramstyle():
|
|
114
|
+
async_job = self._session.sql(
|
|
115
|
+
sql_str,
|
|
116
|
+
params=query_params if query_params else None,
|
|
117
|
+
).collect(block=False, statement_params=statement_params)
|
|
91
118
|
assert isinstance(async_job, snowpark.AsyncJob)
|
|
92
119
|
return async_job.query_id, async_job
|
|
93
120
|
|
|
@@ -256,9 +283,6 @@ class ServiceSQLClient(_base._BaseSQLClient):
|
|
|
256
283
|
)
|
|
257
284
|
.has_column(ServiceSQLClient.MODEL_INFERENCE_SERVICE_ENDPOINT_NAME_COL_NAME, allow_empty=True)
|
|
258
285
|
.has_column(ServiceSQLClient.MODEL_INFERENCE_SERVICE_ENDPOINT_INGRESS_URL_COL_NAME, allow_empty=True)
|
|
259
|
-
.has_column(
|
|
260
|
-
ServiceSQLClient.MODEL_INFERENCE_SERVICE_ENDPOINT_PRIVATELINK_INGRESS_URL_COL_NAME, allow_empty=True
|
|
261
|
-
)
|
|
262
286
|
)
|
|
263
287
|
|
|
264
288
|
return res.validate()
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import importlib
|
|
2
2
|
import json
|
|
3
|
+
import logging
|
|
3
4
|
import os
|
|
4
5
|
import pathlib
|
|
5
6
|
import warnings
|
|
@@ -8,7 +9,6 @@ from typing import Any, Callable, Iterable, Optional, Sequence, cast
|
|
|
8
9
|
import numpy as np
|
|
9
10
|
import numpy.typing as npt
|
|
10
11
|
import pandas as pd
|
|
11
|
-
from absl import logging
|
|
12
12
|
|
|
13
13
|
import snowflake.snowpark.dataframe as sp_df
|
|
14
14
|
from snowflake.ml._internal import env
|
|
@@ -23,6 +23,8 @@ from snowflake.ml.model._signatures import (
|
|
|
23
23
|
)
|
|
24
24
|
from snowflake.snowpark import DataFrame as SnowparkDataFrame
|
|
25
25
|
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
26
28
|
EXPLAIN_BACKGROUND_DATA_ROWS_COUNT_LIMIT = 1000
|
|
27
29
|
|
|
28
30
|
|
|
@@ -257,7 +259,7 @@ def validate_model_task(passed_model_task: model_types.Task, inferred_model_task
|
|
|
257
259
|
)
|
|
258
260
|
return inferred_model_task
|
|
259
261
|
elif inferred_model_task != model_types.Task.UNKNOWN:
|
|
260
|
-
|
|
262
|
+
logger.info(f"Inferred Task: {inferred_model_task.name} is used as task for this model " f"version")
|
|
261
263
|
return inferred_model_task
|
|
262
264
|
return passed_model_task
|
|
263
265
|
|
|
@@ -43,7 +43,6 @@ DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}{{'<|im_start|>' + message
|
|
|
43
43
|
def get_requirements_from_task(task: str, spcs_only: bool = False) -> list[model_env.ModelDependency]:
|
|
44
44
|
# Text
|
|
45
45
|
if task in [
|
|
46
|
-
"conversational",
|
|
47
46
|
"fill-mask",
|
|
48
47
|
"ner",
|
|
49
48
|
"token-classification",
|
|
@@ -521,6 +520,7 @@ class HuggingFacePipelineHandler(
|
|
|
521
520
|
input_data = X[signature.inputs[0].name].to_list()
|
|
522
521
|
temp_res = getattr(raw_model, target_method)(input_data)
|
|
523
522
|
else:
|
|
523
|
+
# TODO: remove conversational pipeline code
|
|
524
524
|
# For others, we could offer the whole dataframe as a list.
|
|
525
525
|
# Some of them may need some conversion
|
|
526
526
|
if hasattr(transformers, "ConversationalPipeline") and isinstance(
|
|
@@ -759,11 +759,13 @@ class HuggingFaceOpenAICompatibleModel:
|
|
|
759
759
|
eos_token_id=self.tokenizer.eos_token_id,
|
|
760
760
|
stop_strings=stop_strings,
|
|
761
761
|
stream=stream,
|
|
762
|
-
repetition_penalty=frequency_penalty,
|
|
763
|
-
diversity_penalty=presence_penalty if n > 1 else None,
|
|
764
762
|
num_return_sequences=n,
|
|
765
|
-
num_beams=max(
|
|
766
|
-
|
|
763
|
+
num_beams=max(1, n), # must be >1
|
|
764
|
+
repetition_penalty=frequency_penalty,
|
|
765
|
+
# TODO: Handle diversity_penalty and num_beam_groups
|
|
766
|
+
# not all models support them making it hard to support any huggingface model
|
|
767
|
+
# diversity_penalty=presence_penalty if n > 1 else None,
|
|
768
|
+
# num_beam_groups=max(2, n) if presence_penalty else 1,
|
|
767
769
|
do_sample=False,
|
|
768
770
|
)
|
|
769
771
|
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
from types import ModuleType
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
5
|
-
from absl import logging
|
|
6
|
-
|
|
7
6
|
from snowflake.ml._internal.exceptions import (
|
|
8
7
|
error_codes,
|
|
9
8
|
exceptions as snowml_exceptions,
|
|
@@ -12,6 +11,8 @@ from snowflake.ml.model import custom_model, model_signature, type_hints as mode
|
|
|
12
11
|
from snowflake.ml.model._packager import model_handler
|
|
13
12
|
from snowflake.ml.model._packager.model_meta import model_meta
|
|
14
13
|
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
15
16
|
|
|
16
17
|
class ModelPackager:
|
|
17
18
|
"""Top-level class to save/load and manage a Snowflake Native formatted model.
|
|
@@ -96,7 +97,7 @@ class ModelPackager:
|
|
|
96
97
|
**options,
|
|
97
98
|
)
|
|
98
99
|
if signatures is None:
|
|
99
|
-
|
|
100
|
+
logger.info(f"Model signatures are auto inferred as:\n\n{meta.signatures}")
|
|
100
101
|
|
|
101
102
|
self.model = model
|
|
102
103
|
self.meta = meta
|
|
@@ -110,27 +110,6 @@ def huggingface_pipeline_signature_auto_infer(
|
|
|
110
110
|
) -> Optional[core.ModelSignature]:
|
|
111
111
|
# Text
|
|
112
112
|
|
|
113
|
-
# https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ConversationalPipeline
|
|
114
|
-
# Needs to convert to conversation object.
|
|
115
|
-
if task == "conversational":
|
|
116
|
-
warnings.warn(
|
|
117
|
-
(
|
|
118
|
-
"Conversational pipeline is removed from transformers since 4.42.0. "
|
|
119
|
-
"Support will be removed from snowflake-ml-python soon."
|
|
120
|
-
),
|
|
121
|
-
category=DeprecationWarning,
|
|
122
|
-
stacklevel=1,
|
|
123
|
-
)
|
|
124
|
-
return core.ModelSignature(
|
|
125
|
-
inputs=[
|
|
126
|
-
core.FeatureSpec(name="user_inputs", dtype=core.DataType.STRING, shape=(-1,)),
|
|
127
|
-
core.FeatureSpec(name="generated_responses", dtype=core.DataType.STRING, shape=(-1,)),
|
|
128
|
-
],
|
|
129
|
-
outputs=[
|
|
130
|
-
core.FeatureSpec(name="generated_responses", dtype=core.DataType.STRING, shape=(-1,)),
|
|
131
|
-
],
|
|
132
|
-
)
|
|
133
|
-
|
|
134
113
|
# https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TokenClassificationPipeline
|
|
135
114
|
if task == "fill-mask":
|
|
136
115
|
return core.ModelSignature(
|