snowflake-ml-python 1.13.0__py3-none-any.whl → 1.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. snowflake/ml/_internal/platform_capabilities.py +9 -7
  2. snowflake/ml/_internal/utils/connection_params.py +5 -3
  3. snowflake/ml/_internal/utils/jwt_generator.py +3 -2
  4. snowflake/ml/_internal/utils/temp_file_utils.py +1 -2
  5. snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +16 -3
  6. snowflake/ml/experiment/_entities/__init__.py +2 -1
  7. snowflake/ml/experiment/_entities/run.py +0 -15
  8. snowflake/ml/experiment/_entities/run_metadata.py +3 -51
  9. snowflake/ml/experiment/experiment_tracking.py +8 -8
  10. snowflake/ml/jobs/_utils/constants.py +1 -1
  11. snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +9 -7
  12. snowflake/ml/jobs/job.py +12 -4
  13. snowflake/ml/jobs/manager.py +34 -7
  14. snowflake/ml/lineage/lineage_node.py +0 -1
  15. snowflake/ml/model/__init__.py +2 -6
  16. snowflake/ml/model/_client/model/batch_inference_specs.py +0 -4
  17. snowflake/ml/model/_client/model/inference_engine_utils.py +55 -0
  18. snowflake/ml/model/_client/model/model_version_impl.py +25 -77
  19. snowflake/ml/model/_client/ops/model_ops.py +9 -2
  20. snowflake/ml/model/_client/ops/service_ops.py +82 -36
  21. snowflake/ml/model/_client/sql/service.py +29 -5
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +4 -2
  23. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +7 -5
  24. snowflake/ml/model/_packager/model_packager.py +4 -3
  25. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +0 -1
  26. snowflake/ml/model/_signatures/utils.py +0 -21
  27. snowflake/ml/model/models/huggingface_pipeline.py +56 -21
  28. snowflake/ml/monitoring/_client/model_monitor_sql_client.py +47 -3
  29. snowflake/ml/monitoring/_manager/model_monitor_manager.py +3 -0
  30. snowflake/ml/monitoring/entities/model_monitor_config.py +3 -0
  31. snowflake/ml/monitoring/model_monitor.py +30 -0
  32. snowflake/ml/registry/_manager/model_manager.py +1 -1
  33. snowflake/ml/registry/_manager/model_parameter_reconciler.py +2 -2
  34. snowflake/ml/utils/connection_params.py +5 -3
  35. snowflake/ml/version.py +1 -1
  36. {snowflake_ml_python-1.13.0.dist-info → snowflake_ml_python-1.15.0.dist-info}/METADATA +51 -34
  37. {snowflake_ml_python-1.13.0.dist-info → snowflake_ml_python-1.15.0.dist-info}/RECORD +40 -39
  38. {snowflake_ml_python-1.13.0.dist-info → snowflake_ml_python-1.15.0.dist-info}/WHEEL +0 -0
  39. {snowflake_ml_python-1.13.0.dist-info → snowflake_ml_python-1.15.0.dist-info}/licenses/LICENSE.txt +0 -0
  40. {snowflake_ml_python-1.13.0.dist-info → snowflake_ml_python-1.15.0.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,10 @@ from snowflake.ml._internal import telemetry
12
12
  from snowflake.ml._internal.utils import sql_identifier
13
13
  from snowflake.ml.lineage import lineage_node
14
14
  from snowflake.ml.model import task, type_hints
15
- from snowflake.ml.model._client.model import batch_inference_specs
15
+ from snowflake.ml.model._client.model import (
16
+ batch_inference_specs,
17
+ inference_engine_utils,
18
+ )
16
19
  from snowflake.ml.model._client.ops import metadata_ops, model_ops, service_ops
17
20
  from snowflake.ml.model._model_composer import model_composer
18
21
  from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
@@ -22,6 +25,7 @@ from snowflake.snowpark import Session, async_job, dataframe
22
25
  _TELEMETRY_PROJECT = "MLOps"
23
26
  _TELEMETRY_SUBPROJECT = "ModelManagement"
24
27
  _BATCH_INFERENCE_JOB_ID_PREFIX = "BATCH_INFERENCE_"
28
+ _BATCH_INFERENCE_TEMPORARY_FOLDER = "_temporary"
25
29
 
26
30
 
27
31
  class ExportMode(enum.Enum):
@@ -553,7 +557,7 @@ class ModelVersion(lineage_node.LineageNode):
553
557
  self,
554
558
  *,
555
559
  compute_pool: str,
556
- input_spec: batch_inference_specs.InputSpec,
560
+ input_spec: dataframe.DataFrame,
557
561
  output_spec: batch_inference_specs.OutputSpec,
558
562
  job_spec: Optional[batch_inference_specs.JobSpec] = None,
559
563
  ) -> jobs.MLJob[Any]:
@@ -569,6 +573,18 @@ class ModelVersion(lineage_node.LineageNode):
569
573
  if warehouse is None:
570
574
  raise ValueError("Warehouse is not set. Please set the warehouse field in the JobSpec.")
571
575
 
576
+ # use a temporary folder in the output stage to store the intermediate output from the dataframe
577
+ output_stage_location = output_spec.stage_location
578
+ if not output_stage_location.endswith("/"):
579
+ output_stage_location += "/"
580
+ input_stage_location = f"{output_stage_location}{_BATCH_INFERENCE_TEMPORARY_FOLDER}/"
581
+
582
+ try:
583
+ input_spec.write.copy_into_location(location=input_stage_location, file_format_type="parquet", header=True)
584
+ # todo: be specific about the type of errors to provide better error messages.
585
+ except Exception as e:
586
+ raise RuntimeError(f"Failed to process input_spec: {e}")
587
+
572
588
  if job_spec.job_name is None:
573
589
  # Same as the MLJob ID generation logic with a different prefix
574
590
  job_name = f"{_BATCH_INFERENCE_JOB_ID_PREFIX}{str(uuid.uuid4()).replace('-', '_').upper()}"
@@ -592,9 +608,9 @@ class ModelVersion(lineage_node.LineageNode):
592
608
  job_name=job_name,
593
609
  replicas=job_spec.replicas,
594
610
  # input and output
595
- input_stage_location=input_spec.stage_location,
611
+ input_stage_location=input_stage_location,
596
612
  input_file_pattern="*",
597
- output_stage_location=output_spec.stage_location,
613
+ output_stage_location=output_stage_location,
598
614
  completion_filename="_SUCCESS",
599
615
  # misc
600
616
  statement_params=statement_params,
@@ -768,75 +784,6 @@ class ModelVersion(lineage_node.LineageNode):
768
784
  version_name=sql_identifier.SqlIdentifier(version),
769
785
  )
770
786
 
771
- def _get_inference_engine_args(
772
- self, experimental_options: Optional[dict[str, Any]]
773
- ) -> Optional[service_ops.InferenceEngineArgs]:
774
-
775
- if not experimental_options:
776
- return None
777
-
778
- if "inference_engine" not in experimental_options:
779
- raise ValueError("inference_engine is required in experimental_options")
780
-
781
- return service_ops.InferenceEngineArgs(
782
- inference_engine=experimental_options["inference_engine"],
783
- inference_engine_args_override=experimental_options.get("inference_engine_args_override"),
784
- )
785
-
786
- def _enrich_inference_engine_args(
787
- self,
788
- inference_engine_args: service_ops.InferenceEngineArgs,
789
- gpu_requests: Optional[Union[str, int]] = None,
790
- ) -> Optional[service_ops.InferenceEngineArgs]:
791
- """Enrich inference engine args with model path and tensor parallelism settings.
792
-
793
- Args:
794
- inference_engine_args: The original inference engine args
795
- gpu_requests: The number of GPUs requested
796
-
797
- Returns:
798
- Enriched inference engine args
799
-
800
- Raises:
801
- ValueError: Invalid gpu_requests
802
- """
803
- if inference_engine_args.inference_engine_args_override is None:
804
- inference_engine_args.inference_engine_args_override = []
805
-
806
- # Get model stage path and strip off "snow://" prefix
807
- model_stage_path = self._model_ops.get_model_version_stage_path(
808
- database_name=None,
809
- schema_name=None,
810
- model_name=self._model_name,
811
- version_name=self._version_name,
812
- )
813
-
814
- # Strip "snow://" prefix
815
- if model_stage_path.startswith("snow://"):
816
- model_stage_path = model_stage_path.replace("snow://", "", 1)
817
-
818
- # Always overwrite the model key by appending
819
- inference_engine_args.inference_engine_args_override.append(f"--model={model_stage_path}")
820
-
821
- gpu_count = None
822
-
823
- # Set tensor-parallelism if gpu_requests is specified
824
- if gpu_requests is not None:
825
- # assert gpu_requests is a string or an integer before casting to int
826
- if isinstance(gpu_requests, str) or isinstance(gpu_requests, int):
827
- try:
828
- gpu_count = int(gpu_requests)
829
- except ValueError:
830
- raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
831
-
832
- if gpu_count is not None:
833
- if gpu_count > 0:
834
- inference_engine_args.inference_engine_args_override.append(f"--tensor-parallel-size={gpu_count}")
835
- else:
836
- raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
837
-
838
- return inference_engine_args
839
-
840
787
  def _check_huggingface_text_generation_model(
841
788
  self,
842
789
  statement_params: Optional[dict[str, Any]] = None,
@@ -1116,13 +1063,14 @@ class ModelVersion(lineage_node.LineageNode):
1116
1063
  if experimental_options:
1117
1064
  self._check_huggingface_text_generation_model(statement_params)
1118
1065
 
1119
- inference_engine_args: Optional[service_ops.InferenceEngineArgs] = self._get_inference_engine_args(
1120
- experimental_options
1121
- )
1066
+ inference_engine_args = inference_engine_utils._get_inference_engine_args(experimental_options)
1122
1067
 
1123
1068
  # Enrich inference engine args if inference engine is specified
1124
1069
  if inference_engine_args is not None:
1125
- inference_engine_args = self._enrich_inference_engine_args(inference_engine_args, gpu_requests)
1070
+ inference_engine_args = inference_engine_utils._enrich_inference_engine_args(
1071
+ inference_engine_args,
1072
+ gpu_requests,
1073
+ )
1126
1074
 
1127
1075
  from snowflake.ml.model import event_handler
1128
1076
  from snowflake.snowpark import exceptions
@@ -47,7 +47,8 @@ class ServiceInfo(TypedDict):
47
47
  class ModelOperator:
48
48
  INFERENCE_SERVICE_ENDPOINT_NAME = "inference"
49
49
  INGRESS_ENDPOINT_URL_SUFFIX = "snowflakecomputing.app"
50
- PRIVATELINK_INGRESS_ENDPOINT_URL_SUBSTRING = "privatelink.snowflakecomputing"
50
+ # app-service-privatelink might not contain "snowflakecomputing" in the url - using the minimum required substring
51
+ PRIVATELINK_INGRESS_ENDPOINT_URL_SUBSTRING = "privatelink.snowflake"
51
52
 
52
53
  def __init__(
53
54
  self,
@@ -631,7 +632,13 @@ class ModelOperator:
631
632
 
632
633
  def _extract_and_validate_privatelink_url(self, res_row: "row.Row") -> Optional[str]:
633
634
  """Extract and validate privatelink ingress URL from endpoint row."""
634
- url_value = res_row[self._service_client.MODEL_INFERENCE_SERVICE_ENDPOINT_PRIVATELINK_INGRESS_URL_COL_NAME]
635
+ # Check if the privatelink_ingress_url column exists
636
+ col_name = self._service_client.MODEL_INFERENCE_SERVICE_ENDPOINT_PRIVATELINK_INGRESS_URL_COL_NAME
637
+ if col_name not in res_row:
638
+ # Column doesn't exist in query result for non-Business Critical accounts
639
+ return None
640
+
641
+ url_value = res_row[col_name]
635
642
  if url_value is None:
636
643
  return None
637
644
  url_str = str(url_value)
@@ -155,7 +155,8 @@ class ServiceOperator:
155
155
  database_name=database_name,
156
156
  schema_name=schema_name,
157
157
  )
158
- if pc.PlatformCapabilities.get_instance().is_inlined_deployment_spec_enabled():
158
+ self._use_inlined_deployment_spec = pc.PlatformCapabilities.get_instance().is_inlined_deployment_spec_enabled()
159
+ if self._use_inlined_deployment_spec:
159
160
  self._workspace = None
160
161
  self._model_deployment_spec = model_deployment_spec.ModelDeploymentSpec()
161
162
  else:
@@ -264,7 +265,14 @@ class ServiceOperator:
264
265
  self._model_deployment_spec.add_hf_logger_spec(
265
266
  hf_model_name=hf_model_args.hf_model_name,
266
267
  hf_task=hf_model_args.hf_task,
267
- hf_token=hf_model_args.hf_token,
268
+ hf_token=(
269
+ # when using inlined deployment spec, we need to use QMARK_RESERVED_TOKEN
270
+ # to avoid revealing the token while calling the SYSTEM$DEPLOY_MODEL function
271
+ # noop if using file-based deployment spec or token is not provided
272
+ service_sql.QMARK_RESERVED_TOKEN
273
+ if hf_model_args.hf_token and self._use_inlined_deployment_spec
274
+ else hf_model_args.hf_token
275
+ ),
268
276
  hf_tokenizer=hf_model_args.hf_tokenizer,
269
277
  hf_revision=hf_model_args.hf_revision,
270
278
  hf_trust_remote_code=hf_model_args.hf_trust_remote_code,
@@ -320,20 +328,31 @@ class ServiceOperator:
320
328
  model_deployment_spec.ModelDeploymentSpec.DEPLOY_SPEC_FILE_REL_PATH if self._workspace else None
321
329
  ),
322
330
  model_deployment_spec_yaml_str=None if self._workspace else spec_yaml_str_or_path,
331
+ query_params=(
332
+ # when using inlined deployment spec, we need to add the token to the query params
333
+ # to avoid revealing the token while calling the SYSTEM$DEPLOY_MODEL function
334
+ # noop if using file-based deployment spec or token is not provided
335
+ [hf_model_args.hf_token]
336
+ if (self._use_inlined_deployment_spec and hf_model_args and hf_model_args.hf_token)
337
+ else []
338
+ ),
323
339
  statement_params=statement_params,
324
340
  )
325
341
 
326
- # stream service logs in a thread
327
- model_build_service_name = sql_identifier.SqlIdentifier(
328
- self._get_service_id_from_deployment_step(query_id, DeploymentStep.MODEL_BUILD)
329
- )
330
- model_build_service = ServiceLogInfo(
331
- database_name=service_database_name,
332
- schema_name=service_schema_name,
333
- service_name=model_build_service_name,
334
- deployment_step=DeploymentStep.MODEL_BUILD,
335
- log_color=service_logger.LogColor.GREEN,
336
- )
342
+ model_build_service: Optional[ServiceLogInfo] = None
343
+ if is_enable_image_build:
344
+ # stream service logs in a thread
345
+ model_build_service_name = sql_identifier.SqlIdentifier(
346
+ self._get_service_id_from_deployment_step(query_id, DeploymentStep.MODEL_BUILD)
347
+ )
348
+ model_build_service = ServiceLogInfo(
349
+ database_name=service_database_name,
350
+ schema_name=service_schema_name,
351
+ service_name=model_build_service_name,
352
+ deployment_step=DeploymentStep.MODEL_BUILD,
353
+ log_color=service_logger.LogColor.GREEN,
354
+ )
355
+
337
356
  model_inference_service = ServiceLogInfo(
338
357
  database_name=service_database_name,
339
358
  schema_name=service_schema_name,
@@ -375,7 +394,7 @@ class ServiceOperator:
375
394
  progress_status.increment()
376
395
 
377
396
  # Poll for model build to start if not using existing service
378
- if not model_inference_service_exists:
397
+ if not model_inference_service_exists and model_build_service:
379
398
  self._wait_for_service_status(
380
399
  model_build_service_name,
381
400
  service_sql.ServiceStatus.RUNNING,
@@ -390,7 +409,7 @@ class ServiceOperator:
390
409
  progress_status.increment()
391
410
 
392
411
  # Poll for model build completion
393
- if not model_inference_service_exists:
412
+ if not model_inference_service_exists and model_build_service:
394
413
  self._wait_for_service_status(
395
414
  model_build_service_name,
396
415
  service_sql.ServiceStatus.DONE,
@@ -454,7 +473,7 @@ class ServiceOperator:
454
473
  self,
455
474
  async_job: snowpark.AsyncJob,
456
475
  model_logger_service: Optional[ServiceLogInfo],
457
- model_build_service: ServiceLogInfo,
476
+ model_build_service: Optional[ServiceLogInfo],
458
477
  model_inference_service: ServiceLogInfo,
459
478
  model_inference_service_exists: bool,
460
479
  force_rebuild: bool,
@@ -483,7 +502,7 @@ class ServiceOperator:
483
502
  self,
484
503
  force_rebuild: bool,
485
504
  service_log_meta: ServiceLogMetadata,
486
- model_build_service: ServiceLogInfo,
505
+ model_build_service: Optional[ServiceLogInfo],
487
506
  model_inference_service: ServiceLogInfo,
488
507
  operation_id: str,
489
508
  statement_params: Optional[dict[str, Any]] = None,
@@ -599,13 +618,24 @@ class ServiceOperator:
599
618
  # check if model logger service is done
600
619
  # and transition the service log metadata to the model image build service
601
620
  if service.deployment_step == DeploymentStep.MODEL_LOGGING:
602
- service_log_meta.transition_service_log_metadata(
603
- model_build_service,
604
- f"Model Logger service {service.display_service_name} complete.",
605
- is_model_build_service_done=False,
606
- is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
607
- operation_id=operation_id,
608
- )
621
+ if model_build_service:
622
+ # building the inference image, transition to the model build service
623
+ service_log_meta.transition_service_log_metadata(
624
+ model_build_service,
625
+ f"Model Logger service {service.display_service_name} complete.",
626
+ is_model_build_service_done=False,
627
+ is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
628
+ operation_id=operation_id,
629
+ )
630
+ else:
631
+ # no model build service, transition to the model inference service
632
+ service_log_meta.transition_service_log_metadata(
633
+ model_inference_service,
634
+ f"Model Logger service {service.display_service_name} complete.",
635
+ is_model_build_service_done=True,
636
+ is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
637
+ operation_id=operation_id,
638
+ )
609
639
  # check if model build service is done
610
640
  # and transition the service log metadata to the model inference service
611
641
  elif service.deployment_step == DeploymentStep.MODEL_BUILD:
@@ -616,6 +646,8 @@ class ServiceOperator:
616
646
  is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
617
647
  operation_id=operation_id,
618
648
  )
649
+ elif service.deployment_step == DeploymentStep.MODEL_INFERENCE:
650
+ module_logger.info(f"Inference service {service.display_service_name} is deployed.")
619
651
  else:
620
652
  module_logger.warning(f"Service {service.display_service_name} is done, but not transitioning.")
621
653
 
@@ -623,7 +655,7 @@ class ServiceOperator:
623
655
  self,
624
656
  async_job: snowpark.AsyncJob,
625
657
  model_logger_service: Optional[ServiceLogInfo],
626
- model_build_service: ServiceLogInfo,
658
+ model_build_service: Optional[ServiceLogInfo],
627
659
  model_inference_service: ServiceLogInfo,
628
660
  model_inference_service_exists: bool,
629
661
  force_rebuild: bool,
@@ -632,14 +664,23 @@ class ServiceOperator:
632
664
  ) -> None:
633
665
  """Stream service logs while the async job is running."""
634
666
 
635
- model_build_service_logger = service_logger.get_logger( # BuildJobName
636
- model_build_service.display_service_name,
637
- model_build_service.log_color,
638
- operation_id=operation_id,
639
- )
640
- if model_logger_service:
641
- model_logger_service_logger = service_logger.get_logger( # ModelLoggerName
642
- model_logger_service.display_service_name,
667
+ if model_build_service:
668
+ model_build_service_logger = service_logger.get_logger(
669
+ model_build_service.display_service_name, # BuildJobName
670
+ model_build_service.log_color,
671
+ operation_id=operation_id,
672
+ )
673
+ service_log_meta = ServiceLogMetadata(
674
+ service_logger=model_build_service_logger,
675
+ service=model_build_service,
676
+ service_status=None,
677
+ is_model_build_service_done=False,
678
+ is_model_logger_service_done=True,
679
+ log_offset=0,
680
+ )
681
+ elif model_logger_service:
682
+ model_logger_service_logger = service_logger.get_logger(
683
+ model_logger_service.display_service_name, # ModelLoggerName
643
684
  model_logger_service.log_color,
644
685
  operation_id=operation_id,
645
686
  )
@@ -653,12 +694,17 @@ class ServiceOperator:
653
694
  log_offset=0,
654
695
  )
655
696
  else:
697
+ model_inference_service_logger = service_logger.get_logger(
698
+ model_inference_service.display_service_name, # ModelInferenceName
699
+ model_inference_service.log_color,
700
+ operation_id=operation_id,
701
+ )
656
702
  service_log_meta = ServiceLogMetadata(
657
- service_logger=model_build_service_logger,
658
- service=model_build_service,
703
+ service_logger=model_inference_service_logger,
704
+ service=model_inference_service,
659
705
  service_status=None,
660
706
  is_model_build_service_done=False,
661
- is_model_logger_service_done=True,
707
+ is_model_logger_service_done=False,
662
708
  log_offset=0,
663
709
  )
664
710
 
@@ -1,8 +1,9 @@
1
+ import contextlib
1
2
  import dataclasses
2
3
  import enum
3
4
  import logging
4
5
  import textwrap
5
- from typing import Any, Optional
6
+ from typing import Any, Generator, Optional
6
7
 
7
8
  from snowflake import snowpark
8
9
  from snowflake.ml._internal.utils import (
@@ -17,6 +18,11 @@ from snowflake.snowpark._internal import utils as snowpark_utils
17
18
 
18
19
  logger = logging.getLogger(__name__)
19
20
 
21
+ # Using this token instead of '?' to avoid escaping issues
22
+ # After quotes are escaped, we replace this token with '|| ? ||'
23
+ QMARK_RESERVED_TOKEN = "<QMARK_RESERVED_TOKEN>"
24
+ QMARK_PARAMETER_TOKEN = "'|| ? ||'"
25
+
20
26
 
21
27
  class ServiceStatus(enum.Enum):
22
28
  PENDING = "PENDING"
@@ -70,12 +76,26 @@ class ServiceSQLClient(_base._BaseSQLClient):
70
76
  CONTAINER_STATUS = "status"
71
77
  MESSAGE = "message"
72
78
 
79
+ @contextlib.contextmanager
80
+ def _qmark_paramstyle(self) -> Generator[None, None, None]:
81
+ """Context manager that temporarily changes paramstyle to qmark and restores original value on exit."""
82
+ if not hasattr(self._session, "_options"):
83
+ yield
84
+ else:
85
+ original_paramstyle = self._session._options["paramstyle"]
86
+ try:
87
+ self._session._options["paramstyle"] = "qmark"
88
+ yield
89
+ finally:
90
+ self._session._options["paramstyle"] = original_paramstyle
91
+
73
92
  def deploy_model(
74
93
  self,
75
94
  *,
76
95
  stage_path: Optional[str] = None,
77
96
  model_deployment_spec_yaml_str: Optional[str] = None,
78
97
  model_deployment_spec_file_rel_path: Optional[str] = None,
98
+ query_params: Optional[list[Any]] = None,
79
99
  statement_params: Optional[dict[str, Any]] = None,
80
100
  ) -> tuple[str, snowpark.AsyncJob]:
81
101
  assert model_deployment_spec_yaml_str or model_deployment_spec_file_rel_path
@@ -83,11 +103,18 @@ class ServiceSQLClient(_base._BaseSQLClient):
83
103
  model_deployment_spec_yaml_str = snowpark_utils.escape_single_quotes(
84
104
  model_deployment_spec_yaml_str
85
105
  ) # type: ignore[no-untyped-call]
106
+ model_deployment_spec_yaml_str = model_deployment_spec_yaml_str.replace( # type: ignore[union-attr]
107
+ QMARK_RESERVED_TOKEN, QMARK_PARAMETER_TOKEN
108
+ )
86
109
  logger.info(f"Deploying model with spec={model_deployment_spec_yaml_str}")
87
110
  sql_str = f"CALL SYSTEM$DEPLOY_MODEL('{model_deployment_spec_yaml_str}')"
88
111
  else:
89
112
  sql_str = f"CALL SYSTEM$DEPLOY_MODEL('@{stage_path}/{model_deployment_spec_file_rel_path}')"
90
- async_job = self._session.sql(sql_str).collect(block=False, statement_params=statement_params)
113
+ with self._qmark_paramstyle():
114
+ async_job = self._session.sql(
115
+ sql_str,
116
+ params=query_params if query_params else None,
117
+ ).collect(block=False, statement_params=statement_params)
91
118
  assert isinstance(async_job, snowpark.AsyncJob)
92
119
  return async_job.query_id, async_job
93
120
 
@@ -256,9 +283,6 @@ class ServiceSQLClient(_base._BaseSQLClient):
256
283
  )
257
284
  .has_column(ServiceSQLClient.MODEL_INFERENCE_SERVICE_ENDPOINT_NAME_COL_NAME, allow_empty=True)
258
285
  .has_column(ServiceSQLClient.MODEL_INFERENCE_SERVICE_ENDPOINT_INGRESS_URL_COL_NAME, allow_empty=True)
259
- .has_column(
260
- ServiceSQLClient.MODEL_INFERENCE_SERVICE_ENDPOINT_PRIVATELINK_INGRESS_URL_COL_NAME, allow_empty=True
261
- )
262
286
  )
263
287
 
264
288
  return res.validate()
@@ -1,5 +1,6 @@
1
1
  import importlib
2
2
  import json
3
+ import logging
3
4
  import os
4
5
  import pathlib
5
6
  import warnings
@@ -8,7 +9,6 @@ from typing import Any, Callable, Iterable, Optional, Sequence, cast
8
9
  import numpy as np
9
10
  import numpy.typing as npt
10
11
  import pandas as pd
11
- from absl import logging
12
12
 
13
13
  import snowflake.snowpark.dataframe as sp_df
14
14
  from snowflake.ml._internal import env
@@ -23,6 +23,8 @@ from snowflake.ml.model._signatures import (
23
23
  )
24
24
  from snowflake.snowpark import DataFrame as SnowparkDataFrame
25
25
 
26
+ logger = logging.getLogger(__name__)
27
+
26
28
  EXPLAIN_BACKGROUND_DATA_ROWS_COUNT_LIMIT = 1000
27
29
 
28
30
 
@@ -257,7 +259,7 @@ def validate_model_task(passed_model_task: model_types.Task, inferred_model_task
257
259
  )
258
260
  return inferred_model_task
259
261
  elif inferred_model_task != model_types.Task.UNKNOWN:
260
- logging.info(f"Inferred Task: {inferred_model_task.name} is used as task for this model " f"version")
262
+ logger.info(f"Inferred Task: {inferred_model_task.name} is used as task for this model " f"version")
261
263
  return inferred_model_task
262
264
  return passed_model_task
263
265
 
@@ -43,7 +43,6 @@ DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}{{'<|im_start|>' + message
43
43
  def get_requirements_from_task(task: str, spcs_only: bool = False) -> list[model_env.ModelDependency]:
44
44
  # Text
45
45
  if task in [
46
- "conversational",
47
46
  "fill-mask",
48
47
  "ner",
49
48
  "token-classification",
@@ -521,6 +520,7 @@ class HuggingFacePipelineHandler(
521
520
  input_data = X[signature.inputs[0].name].to_list()
522
521
  temp_res = getattr(raw_model, target_method)(input_data)
523
522
  else:
523
+ # TODO: remove conversational pipeline code
524
524
  # For others, we could offer the whole dataframe as a list.
525
525
  # Some of them may need some conversion
526
526
  if hasattr(transformers, "ConversationalPipeline") and isinstance(
@@ -759,11 +759,13 @@ class HuggingFaceOpenAICompatibleModel:
759
759
  eos_token_id=self.tokenizer.eos_token_id,
760
760
  stop_strings=stop_strings,
761
761
  stream=stream,
762
- repetition_penalty=frequency_penalty,
763
- diversity_penalty=presence_penalty if n > 1 else None,
764
762
  num_return_sequences=n,
765
- num_beams=max(2, n), # must be >1
766
- num_beam_groups=max(2, n) if presence_penalty else 1,
763
+ num_beams=max(1, n), # must be >1
764
+ repetition_penalty=frequency_penalty,
765
+ # TODO: Handle diversity_penalty and num_beam_groups
766
+ # not all models support them making it hard to support any huggingface model
767
+ # diversity_penalty=presence_penalty if n > 1 else None,
768
+ # num_beam_groups=max(2, n) if presence_penalty else 1,
767
769
  do_sample=False,
768
770
  )
769
771
 
@@ -1,9 +1,8 @@
1
+ import logging
1
2
  import os
2
3
  from types import ModuleType
3
4
  from typing import Optional
4
5
 
5
- from absl import logging
6
-
7
6
  from snowflake.ml._internal.exceptions import (
8
7
  error_codes,
9
8
  exceptions as snowml_exceptions,
@@ -12,6 +11,8 @@ from snowflake.ml.model import custom_model, model_signature, type_hints as mode
12
11
  from snowflake.ml.model._packager import model_handler
13
12
  from snowflake.ml.model._packager.model_meta import model_meta
14
13
 
14
+ logger = logging.getLogger(__name__)
15
+
15
16
 
16
17
  class ModelPackager:
17
18
  """Top-level class to save/load and manage a Snowflake Native formatted model.
@@ -96,7 +97,7 @@ class ModelPackager:
96
97
  **options,
97
98
  )
98
99
  if signatures is None:
99
- logging.info(f"Model signatures are auto inferred as:\n\n{meta.signatures}")
100
+ logger.info(f"Model signatures are auto inferred as:\n\n{meta.signatures}")
100
101
 
101
102
  self.model = model
102
103
  self.meta = meta
@@ -2,7 +2,6 @@
2
2
  # Generate by running 'bazel run --config=pre_build //bazel/requirements:sync_requirements'
3
3
 
4
4
  REQUIREMENTS = [
5
- "absl-py>=0.15,<2",
6
5
  "aiohttp!=4.0.0a0, !=4.0.0a1",
7
6
  "anyio>=3.5.0,<5",
8
7
  "cachetools>=3.1.1,<6",
@@ -110,27 +110,6 @@ def huggingface_pipeline_signature_auto_infer(
110
110
  ) -> Optional[core.ModelSignature]:
111
111
  # Text
112
112
 
113
- # https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ConversationalPipeline
114
- # Needs to convert to conversation object.
115
- if task == "conversational":
116
- warnings.warn(
117
- (
118
- "Conversational pipeline is removed from transformers since 4.42.0. "
119
- "Support will be removed from snowflake-ml-python soon."
120
- ),
121
- category=DeprecationWarning,
122
- stacklevel=1,
123
- )
124
- return core.ModelSignature(
125
- inputs=[
126
- core.FeatureSpec(name="user_inputs", dtype=core.DataType.STRING, shape=(-1,)),
127
- core.FeatureSpec(name="generated_responses", dtype=core.DataType.STRING, shape=(-1,)),
128
- ],
129
- outputs=[
130
- core.FeatureSpec(name="generated_responses", dtype=core.DataType.STRING, shape=(-1,)),
131
- ],
132
- )
133
-
134
113
  # https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TokenClassificationPipeline
135
114
  if task == "fill-mask":
136
115
  return core.ModelSignature(