PyPI - snowflake-ml-python - Versions diffs - 1.9.2__py3-none-any.whl → 1.10.0__py3-none-any.whl - Mend

snowflake-ml-python 1.9.2py3-none-any.whl → 1.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

snowflake/ml/_internal/utils/service_logger.py CHANGED Viewed

@@ -9,6 +9,15 @@ from typing import Optional
 import platformdirs
+# Module-level logger for operational messages that should appear on console
+stdout_handler = logging.StreamHandler(sys.stdout)
+stdout_handler.setFormatter(logging.Formatter("%(message)s"))
+console_logger = logging.getLogger(__name__)
+console_logger.addHandler(stdout_handler)
+console_logger.setLevel(logging.INFO)
+console_logger.propagate = False
 class LogColor(enum.Enum):
     GREY = "\x1b[38;20m"
@@ -109,42 +118,36 @@ def _get_or_create_parent_logger(operation_id: str) -> logging.Logger:
     """Get or create a parent logger with FileHandler for the operation."""
     parent_logger_name = f"snowflake_ml_operation_{operation_id}"
     parent_logger = logging.getLogger(parent_logger_name)
+    parent_logger.setLevel(logging.DEBUG)
+    parent_logger.propagate = False
-    # Only add handler if it doesn't exist yet
     if not parent_logger.handlers:
         log_file_path = _get_log_file_path(operation_id)
         if log_file_path:
-            # Successfully found a writable location
             try:
                 file_handler = logging.FileHandler(log_file_path)
                 file_handler.setFormatter(logging.Formatter("%(name)s [%(asctime)s] [%(levelname)s] %(message)s"))
                 parent_logger.addHandler(file_handler)
-                parent_logger.setLevel(logging.DEBUG)
-                parent_logger.propagate = False  # Don't propagate to root logger
-                # Log the file location
-                parent_logger.warning(f"Operation logs saved to: {log_file_path}")
+                console_logger.info(f"create_service logs saved to: {log_file_path}")
             except OSError as e:
-                # Even though we found a path, file creation failed
-                # Fall back to console-only logging
-                parent_logger.setLevel(logging.DEBUG)
-                parent_logger.propagate = False
-                parent_logger.warning(f"Could not create log file at {log_file_path}: {e}. Using console-only logging.")
+                console_logger.warning(f"Could not create log file at {log_file_path}: {e}.")
         else:
             # No writable location found, use console-only logging
-            parent_logger.setLevel(logging.DEBUG)
-            parent_logger.propagate = False
-            parent_logger.warning("Filesystem appears to be readonly. Using console-only logging.")
+            console_logger.warning("No writable location found for create_service log file.")
+        if logging.getLogger().level > logging.INFO:
+            console_logger.info(
+                "To see logs in console, set log level to INFO: logging.getLogger().setLevel(logging.INFO)"
+            )
     return parent_logger
 def get_logger(logger_name: str, info_color: LogColor, operation_id: Optional[str] = None) -> logging.Logger:
     logger = logging.getLogger(logger_name)
-    handler = logging.StreamHandler(sys.stdout)
-    handler.setFormatter(CustomFormatter(info_color))
-    logger.addHandler(handler)
+    root_logger = logging.getLogger()
     # If operation_id provided, set up parent logger with file handler
     if operation_id:
@@ -152,6 +155,17 @@ def get_logger(logger_name: str, info_color: LogColor, operation_id: Optional[st
         logger.parent = parent_logger
         logger.propagate = True
+        if root_logger.level <= logging.INFO:
+            handler = logging.StreamHandler(sys.stdout)
+            handler.setFormatter(CustomFormatter(info_color))
+            logger.addHandler(handler)
+    else:
+        # No operation_id - add console handler only if user wants verbose logging
+        if root_logger.level <= logging.INFO and not logger.handlers:
+            handler = logging.StreamHandler(sys.stdout)
+            handler.setFormatter(CustomFormatter(info_color))
+            logger.addHandler(handler)
     return logger

snowflake/ml/experiment/callback/lightgbm.py ADDED Viewed

@@ -0,0 +1,55 @@
+from typing import TYPE_CHECKING, Optional
+from warnings import warn
+import lightgbm as lgb
+if TYPE_CHECKING:
+    from snowflake.ml.experiment.experiment_tracking import ExperimentTracking
+    from snowflake.ml.model.model_signature import ModelSignature
+class SnowflakeLightgbmCallback(lgb.callback._RecordEvaluationCallback):
+    def __init__(
+        self,
+        experiment_tracking: "ExperimentTracking",
+        log_model: bool = True,
+        log_metrics: bool = True,
+        log_params: bool = True,
+        model_name: Optional[str] = None,
+        model_signature: Optional["ModelSignature"] = None,
+    ) -> None:
+        self._experiment_tracking = experiment_tracking
+        self.log_model = log_model
+        self.log_metrics = log_metrics
+        self.log_params = log_params
+        self.model_name = model_name
+        self.model_signature = model_signature
+        super().__init__(eval_result={})
+    def __call__(self, env: lgb.callback.CallbackEnv) -> None:
+        if self.log_params:
+            if env.iteration == env.begin_iteration:  # Log params only at the first iteration
+                self._experiment_tracking.log_params(env.params)
+        if self.log_metrics:
+            super().__call__(env)
+            for dataset_name, metrics in self.eval_result.items():
+                for metric_name, log in metrics.items():
+                    metric_key = dataset_name + ":" + metric_name
+                    self._experiment_tracking.log_metric(key=metric_key, value=log[-1], step=env.iteration)
+        if self.log_model:
+            if env.iteration == env.end_iteration - 1:  # Log model only at the last iteration
+                if self.model_signature:
+                    model_name = self.model_name or self._experiment_tracking._get_or_set_experiment().name + "_model"
+                    self._experiment_tracking.log_model(  # type: ignore[call-arg]
+                        model=env.model,
+                        model_name=model_name,
+                        signatures={"predict": self.model_signature},
+                    )
+                else:
+                    warn(
+                        "Model will not be logged because model signature is missing. To autolog the model, "
+                        "please specify `model_signature` when constructing SnowflakeLightgbmCallback."
+                    )

snowflake/ml/experiment/callback/xgboost.py ADDED Viewed

@@ -0,0 +1,63 @@
+import json
+from typing import TYPE_CHECKING, Any, Optional
+from warnings import warn
+import xgboost as xgb
+from snowflake.ml.experiment import utils
+if TYPE_CHECKING:
+    from snowflake.ml.experiment.experiment_tracking import ExperimentTracking
+    from snowflake.ml.model.model_signature import ModelSignature
+class SnowflakeXgboostCallback(xgb.callback.TrainingCallback):
+    def __init__(
+        self,
+        experiment_tracking: "ExperimentTracking",
+        log_model: bool = True,
+        log_metrics: bool = True,
+        log_params: bool = True,
+        model_name: Optional[str] = None,
+        model_signature: Optional["ModelSignature"] = None,
+    ) -> None:
+        self._experiment_tracking = experiment_tracking
+        self.log_model = log_model
+        self.log_metrics = log_metrics
+        self.log_params = log_params
+        self.model_name = model_name
+        self.model_signature = model_signature
+    def before_training(self, model: xgb.Booster) -> xgb.Booster:
+        if self.log_params:
+            params = json.loads(model.save_config())
+            self._experiment_tracking.log_params(utils.flatten_nested_params(params))
+        return model
+    def after_iteration(self, model: Any, epoch: int, evals_log: dict[str, dict[str, Any]]) -> bool:
+        if self.log_metrics:
+            for dataset_name, metrics in evals_log.items():
+                for metric_name, log in metrics.items():
+                    metric_key = dataset_name + ":" + metric_name
+                    self._experiment_tracking.log_metric(key=metric_key, value=log[-1], step=epoch)
+        return False
+    def after_training(self, model: xgb.Booster) -> xgb.Booster:
+        if self.log_model:
+            if not self.model_signature:
+                warn(
+                    "Model will not be logged because model signature is missing. "
+                    "To autolog the model, please specify `model_signature` when constructing SnowflakeXgboostCallback."
+                )
+                return model
+            model_name = self.model_name or self._experiment_tracking._get_or_set_experiment().name + "_model"
+            self._experiment_tracking.log_model(  # type: ignore[call-arg]
+                model=model,
+                model_name=model_name,
+                signatures={"predict": self.model_signature},
+            )
+        return model

snowflake/ml/experiment/utils.py ADDED Viewed

@@ -0,0 +1,14 @@
+from typing import Any, Union
+def flatten_nested_params(params: Union[list[Any], dict[str, Any]], prefix: str = "") -> dict[str, Any]:
+    flat_params = {}
+    items = params.items() if isinstance(params, dict) else enumerate(params)
+    for key, value in items:
+        key = str(key).replace(".", "_")  # Replace dots in keys to avoid collisions involving nested keys
+        new_prefix = f"{prefix}.{key}" if prefix else key
+        if isinstance(value, (dict, list)):
+            flat_params.update(flatten_nested_params(value, new_prefix))
+        else:
+            flat_params[new_prefix] = value
+    return flat_params

snowflake/ml/jobs/_utils/payload_utils.py CHANGED Viewed

@@ -63,6 +63,13 @@ _STARTUP_SCRIPT_CODE = textwrap.dedent(
     ##### Set up Python environment #####
     export PYTHONPATH=/opt/env/site-packages/
+    MLRS_SYSTEM_REQUIREMENTS_FILE=${{MLRS_SYSTEM_REQUIREMENTS_FILE:-"${{SYSTEM_DIR}}/requirements.txt"}}
+    if [ -f "${{MLRS_SYSTEM_REQUIREMENTS_FILE}}" ]; then
+        echo "Installing packages from $MLRS_SYSTEM_REQUIREMENTS_FILE"
+        pip install -r $MLRS_SYSTEM_REQUIREMENTS_FILE
+    fi
     MLRS_REQUIREMENTS_FILE=${{MLRS_REQUIREMENTS_FILE:-"requirements.txt"}}
     if [ -f "${{MLRS_REQUIREMENTS_FILE}}" ]; then
         # TODO: Prevent collisions with MLRS packages using virtualenvs
@@ -454,8 +461,6 @@ class JobPayload:
                 overwrite=True,
             )
             source = Path(entrypoint.file_path.parent)
-            if not any(r.startswith("cloudpickle") for r in pip_requirements):
-                pip_requirements.append(f"cloudpickle~={version.parse(cp.__version__).major}.0")
         elif isinstance(source, stage_utils.StagePath):
             # copy payload to stage
@@ -470,19 +475,20 @@ class JobPayload:
         upload_payloads(session, app_stage_path, *additional_payload_specs)
-        # Upload requirements to app/ directory
-        # TODO: Check if payload includes both a requirements.txt file and pip_requirements
+        if not any(r.startswith("cloudpickle") for r in pip_requirements):
+            pip_requirements.append(f"cloudpickle~={version.parse(cp.__version__).major}.0")
+        #  Upload system scripts and requirements.txt generated by pip_requirements to system/ directory
+        system_stage_path = stage_path.joinpath(constants.SYSTEM_STAGE_SUBPATH)
         if pip_requirements:
             # Upload requirements.txt to stage
             session.file.put_stream(
                 io.BytesIO("\n".join(pip_requirements).encode()),
-                stage_location=app_stage_path.joinpath("requirements.txt").as_posix(),
+                stage_location=system_stage_path.joinpath("requirements.txt").as_posix(),
                 auto_compress=False,
                 overwrite=True,
             )
-        # Upload startup script to system/ directory within payload
-        system_stage_path = stage_path.joinpath(constants.SYSTEM_STAGE_SUBPATH)
         # TODO: Make sure payload does not include file with same name
         session.file.put_stream(
             io.BytesIO(_STARTUP_SCRIPT_CODE.encode()),

snowflake/ml/jobs/_utils/scripts/mljob_launcher.py CHANGED Viewed

@@ -191,7 +191,7 @@ def wait_for_instances(
             logger.info(f"Minimum instance requirement met: {total_nodes} instances available after {elapsed:.1f}s")
             return
-        logger.debug(
+        logger.info(
             f"Waiting for instances: current_instances={total_nodes}, min_instances={min_instances}, "
             f"target_instances={target_instances}, elapsed={elapsed:.1f}s, next check in {current_interval:.1f}s"
         )
@@ -199,7 +199,7 @@ def wait_for_instances(
         current_interval = min(current_interval * 2, check_interval)  # Exponential backoff
     raise TimeoutError(
-        f"Timed out after {timeout}s waiting for {min_instances} instances, only " f"{total_nodes} available"
+        f"Timed out after {elapsed}s waiting for {min_instances} instances, only " f"{total_nodes} available"
     )

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import enum
-import logging
 import pathlib
 import tempfile
 import warnings
@@ -881,6 +880,7 @@ class ModelVersion(lineage_node.LineageNode):
         Raises:
             ValueError: Illegal external access integration arguments.
+            exceptions.SnowparkSQLException: if service already exists.
         Returns:
             If `block=True`, return result information about service creation from server.
@@ -891,16 +891,6 @@ class ModelVersion(lineage_node.LineageNode):
             subproject=_TELEMETRY_SUBPROJECT,
         )
-        # Check root logger level and emit warning if needed
-        root_logger = logging.getLogger()
-        if root_logger.level in (logging.WARNING, logging.ERROR):
-            warnings.warn(
-                "Suppressing service logs. Set the log level to INFO if you would like "
-                "verbose service logs (e.g., logging.getLogger().setLevel(logging.INFO)).",
-                UserWarning,
-                stacklevel=2,
-            )
         if build_external_access_integration is not None:
             msg = (
                 "`build_external_access_integration` is deprecated. "
@@ -917,39 +907,60 @@ class ModelVersion(lineage_node.LineageNode):
         service_db_id, service_schema_id, service_id = sql_identifier.parse_fully_qualified_name(service_name)
         image_repo_db_id, image_repo_schema_id, image_repo_id = sql_identifier.parse_fully_qualified_name(image_repo)
-        return self._service_ops.create_service(
-            database_name=None,
-            schema_name=None,
-            model_name=self._model_name,
-            version_name=self._version_name,
-            service_database_name=service_db_id,
-            service_schema_name=service_schema_id,
-            service_name=service_id,
-            image_build_compute_pool_name=(
-                sql_identifier.SqlIdentifier(image_build_compute_pool)
-                if image_build_compute_pool
-                else sql_identifier.SqlIdentifier(service_compute_pool)
-            ),
-            service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
-            image_repo_database_name=image_repo_db_id,
-            image_repo_schema_name=image_repo_schema_id,
-            image_repo_name=image_repo_id,
-            ingress_enabled=ingress_enabled,
-            max_instances=max_instances,
-            cpu_requests=cpu_requests,
-            memory_requests=memory_requests,
-            gpu_requests=gpu_requests,
-            num_workers=num_workers,
-            max_batch_rows=max_batch_rows,
-            force_rebuild=force_rebuild,
-            build_external_access_integrations=(
-                None
-                if build_external_access_integrations is None
-                else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
-            ),
-            block=block,
-            statement_params=statement_params,
-        )
+        from snowflake.ml.model import event_handler
+        from snowflake.snowpark import exceptions
+        model_event_handler = event_handler.ModelEventHandler()
+        with model_event_handler.status("Creating model inference service", total=6, block=block) as status:
+            try:
+                result = self._service_ops.create_service(
+                    database_name=None,
+                    schema_name=None,
+                    model_name=self._model_name,
+                    version_name=self._version_name,
+                    service_database_name=service_db_id,
+                    service_schema_name=service_schema_id,
+                    service_name=service_id,
+                    image_build_compute_pool_name=(
+                        sql_identifier.SqlIdentifier(image_build_compute_pool)
+                        if image_build_compute_pool
+                        else sql_identifier.SqlIdentifier(service_compute_pool)
+                    ),
+                    service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
+                    image_repo=image_repo,
+                    ingress_enabled=ingress_enabled,
+                    max_instances=max_instances,
+                    cpu_requests=cpu_requests,
+                    memory_requests=memory_requests,
+                    gpu_requests=gpu_requests,
+                    num_workers=num_workers,
+                    max_batch_rows=max_batch_rows,
+                    force_rebuild=force_rebuild,
+                    build_external_access_integrations=(
+                        None
+                        if build_external_access_integrations is None
+                        else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
+                    ),
+                    block=block,
+                    statement_params=statement_params,
+                    progress_status=status,
+                )
+                status.update(label="Model service created successfully", state="complete", expanded=False)
+                return result
+            except exceptions.SnowparkSQLException as e:
+                # Check if the error is because the service already exists
+                if "already exists" in str(e).lower() or "100132" in str(
+                    e
+                ):  # 100132 is Snowflake error code for object already exists
+                    status.update("service already exists")
+                    status.complete()
+                    status.update(label="Service already exists", state="error", expanded=False)
+                    raise
+                else:
+                    status.update(label="Service creation failed", state="error", expanded=False)
+                    raise
     @telemetry.send_api_usage_telemetry(
         project=_TELEMETRY_PROJECT,
@@ -1045,7 +1056,6 @@ class ModelVersion(lineage_node.LineageNode):
         )
         target_function_info = self._get_function_info(function_name=function_name)
         job_db_id, job_schema_id, job_id = sql_identifier.parse_fully_qualified_name(job_name)
-        image_repo_db_id, image_repo_schema_id, image_repo_id = sql_identifier.parse_fully_qualified_name(image_repo)
         output_table_db_id, output_table_schema_id, output_table_id = sql_identifier.parse_fully_qualified_name(
             output_table_name
         )
@@ -1064,9 +1074,7 @@ class ModelVersion(lineage_node.LineageNode):
             job_name=job_id,
             compute_pool_name=sql_identifier.SqlIdentifier(compute_pool),
             warehouse_name=sql_identifier.SqlIdentifier(warehouse),
-            image_repo_database_name=image_repo_db_id,
-            image_repo_schema_name=image_repo_schema_id,
-            image_repo_name=image_repo_id,
+            image_repo=image_repo,
             output_table_database_name=output_table_db_id,
             output_table_schema_name=output_table_schema_id,
             output_table_name=output_table_id,

snowflake-ml-python 1.9.2__py3-none-any.whl → 1.10.0__py3-none-any.whl

snowflake-ml-python 1.9.2py3-none-any.whl → 1.10.0py3-none-any.whl