PyPI - snowflake-ml-python - Versions diffs - 1.16.0__py3-none-any.whl → 1.18.0__py3-none-any.whl - Mend

snowflake-ml-python 1.16.0py3-none-any.whl → 1.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Any, Callable, Optional, Union, overload
 import pandas as pd
+from snowflake import snowpark
 from snowflake.ml import jobs
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import sql_identifier
@@ -19,7 +20,9 @@ from snowflake.ml.model._client.model import (
 from snowflake.ml.model._client.ops import metadata_ops, model_ops, service_ops
 from snowflake.ml.model._model_composer import model_composer
 from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
+from snowflake.ml.model._model_composer.model_method import utils as model_method_utils
 from snowflake.ml.model._packager.model_handlers import snowmlmodel
+from snowflake.ml.model._packager.model_meta import model_meta_schema
 from snowflake.snowpark import Session, async_job, dataframe
 _TELEMETRY_PROJECT = "MLOps"
@@ -41,6 +44,7 @@ class ModelVersion(lineage_node.LineageNode):
     _model_name: sql_identifier.SqlIdentifier
     _version_name: sql_identifier.SqlIdentifier
     _functions: list[model_manifest_schema.ModelFunctionInfo]
+    _model_spec: Optional[model_meta_schema.ModelMetadataDict]
     def __init__(self) -> None:
         raise RuntimeError("ModelVersion's initializer is not meant to be used. Use `version` from model instead.")
@@ -150,6 +154,7 @@ class ModelVersion(lineage_node.LineageNode):
         self._model_name = model_name
         self._version_name = version_name
         self._functions = self._get_functions()
+        self._model_spec = None
         super(cls, cls).__init__(
             self,
             session=model_ops._session,
@@ -437,6 +442,26 @@ class ModelVersion(lineage_node.LineageNode):
         """
         return self._functions
+    def _get_model_spec(self, statement_params: Optional[dict[str, Any]] = None) -> model_meta_schema.ModelMetadataDict:
+        """Fetch and cache the model spec for this model version.
+        Args:
+            statement_params: Optional dictionary of statement parameters to include
+                in the SQL command to fetch the model spec.
+        Returns:
+            The model spec as a dictionary for this model version.
+        """
+        if self._model_spec is None:
+            self._model_spec = self._model_ops._fetch_model_spec(
+                database_name=None,
+                schema_name=None,
+                model_name=self._model_name,
+                version_name=self._version_name,
+                statement_params=statement_params,
+            )
+        return self._model_spec
     @overload
     def run(
         self,
@@ -531,6 +556,8 @@ class ModelVersion(lineage_node.LineageNode):
                 statement_params=statement_params,
             )
         else:
+            explain_case_sensitive = self._determine_explain_case_sensitivity(target_function_info, statement_params)
             return self._model_ops.invoke_method(
                 method_name=sql_identifier.SqlIdentifier(target_function_info["name"]),
                 method_function_type=target_function_info["target_method_function_type"],
@@ -544,8 +571,20 @@ class ModelVersion(lineage_node.LineageNode):
                 partition_column=partition_column,
                 statement_params=statement_params,
                 is_partitioned=target_function_info["is_partitioned"],
+                explain_case_sensitive=explain_case_sensitive,
             )
+    def _determine_explain_case_sensitivity(
+        self,
+        target_function_info: model_manifest_schema.ModelFunctionInfo,
+        statement_params: Optional[dict[str, Any]] = None,
+    ) -> bool:
+        model_spec = self._get_model_spec(statement_params)
+        method_options = model_spec.get("method_options", {})
+        return model_method_utils.determine_explain_case_sensitive_from_method_options(
+            method_options, target_function_info["name"]
+        )
     @telemetry.send_api_usage_telemetry(
         project=_TELEMETRY_PROJECT,
         subproject=_TELEMETRY_SUBPROJECT,
@@ -555,7 +594,8 @@ class ModelVersion(lineage_node.LineageNode):
             "job_spec",
         ],
     )
-    def _run_batch(
+    @snowpark._internal.utils.private_preview(version="1.18.0")
+    def run_batch(
         self,
         *,
         compute_pool: str,
@@ -563,6 +603,68 @@ class ModelVersion(lineage_node.LineageNode):
         output_spec: batch_inference_specs.OutputSpec,
         job_spec: Optional[batch_inference_specs.JobSpec] = None,
     ) -> jobs.MLJob[Any]:
+        """Execute batch inference on datasets as an SPCS job.
+        Args:
+            compute_pool (str): Name of the compute pool to use for building the image containers and batch
+                inference execution.
+            input_spec (dataframe.DataFrame): Snowpark DataFrame containing the input data for inference.
+                The DataFrame should contain all required features for model prediction and passthrough columns.
+            output_spec (batch_inference_specs.OutputSpec): Configuration for where and how to save
+                the inference results. Specifies the stage location and file handling behavior.
+            job_spec (Optional[batch_inference_specs.JobSpec]): Optional configuration for job
+                execution parameters such as compute resources, worker counts, and job naming.
+                If None, default values will be used.
+        Returns:
+            jobs.MLJob[Any]: A batch inference job object that can be used to monitor progress and manage the job
+                lifecycle.
+        Raises:
+            ValueError: If warehouse is not set in job_spec and no current warehouse is available.
+            RuntimeError: If the input_spec cannot be processed or written to the staging location.
+        Example:
+            >>> # Prepare input data - Example 1: From a table
+            >>> input_df = session.table("my_input_table")
+            >>>
+            >>> # Prepare input data - Example 2: From a SQL query
+            >>> input_df = session.sql(
+            ...     "SELECT id, feature_1, feature_2 FROM feature_table WHERE feature_1 > 100"
+            ... )
+            >>>
+            >>> # Prepare input data - Example 3: From Parquet files in a stage
+            >>> input_df = session.read.option("pattern", ".*\\.parquet").parquet(
+            ...     "@my_stage/input_data/"
+            ... ).select("id", "feature_1", "feature_2")
+            >>>
+            >>> # Configure output location
+            >>> output_spec = OutputSpec(
+            ...     stage_location='@My_DB.PUBLIC.MY_STAGE/someth/path/',
+            ...     mode=SaveMode.OVERWRITE
+            ... )
+            >>>
+            >>> # Configure job parameters
+            >>> job_spec = JobSpec(
+            ...     job_name="my_batch_inference",
+            ...     num_workers=4,
+            ...     cpu_requests="2",
+            ...     memory_requests="8Gi"
+            ... )
+            >>>
+            >>> # Run batch inference
+            >>> job = model_version.run_batch(
+            ...     compute_pool="my_compute_pool",
+            ...     input_spec=input_df,
+            ...     output_spec=output_spec,
+            ...     job_spec=job_spec
+            ... )
+        Note:
+            This method is currently in private preview and requires Snowflake version 1.18.0 or later.
+            The input data is temporarily stored in the output stage location under /_temporary before
+            inference execution.
+        """
         statement_params = telemetry.get_statement_params(
             project=_TELEMETRY_PROJECT,
             subproject=_TELEMETRY_SUBPROJECT,
@@ -789,6 +891,51 @@ class ModelVersion(lineage_node.LineageNode):
             version_name=sql_identifier.SqlIdentifier(version),
         )
+    def _can_run_on_gpu(
+        self,
+        statement_params: Optional[dict[str, Any]] = None,
+    ) -> bool:
+        """Check if the model has GPU runtime support.
+        Args:
+            statement_params: Optional dictionary of statement parameters to include
+                in the SQL command to fetch model spec.
+        Returns:
+            True if the model has GPU runtime configured, False otherwise.
+        """
+        # Fetch model spec
+        model_spec = self._get_model_spec(statement_params)
+        # Check if runtimes section exists and has gpu runtime
+        runtimes = model_spec.get("runtimes", {})
+        return "gpu" in runtimes
+    def _throw_error_if_gpu_is_not_supported(
+        self,
+        gpu_requests: Optional[Union[str, int]] = None,
+        statement_params: Optional[dict[str, Any]] = None,
+    ) -> None:
+        """Check if the model has GPU runtime support.
+        Args:
+            gpu_requests: The gpu limit for GPU based inference. Can be integer, fractional or string values. Use CPU
+                if None.
+            statement_params: Optional dictionary of statement parameters to include
+                in the SQL command to fetch model spec.
+        Raises:
+            ValueError: If the model does not have GPU runtime support.
+        """
+        if gpu_requests is not None and not self._can_run_on_gpu(statement_params):
+            raise ValueError(
+                f"GPU resources requested (gpu_requests={gpu_requests}), but the model "
+                f"{self.fully_qualified_model_name} version {self.version_name} does not have GPU runtime support. "
+                "Please ensure the model was logged with GPU runtime configuration or do not provide gpu_requests. "
+                "To log the model with GPU runtime configuration, provide `cuda_version` in the `options` while calling"
+                " the `log_model` function."
+            )
     def _check_huggingface_text_generation_model(
         self,
         statement_params: Optional[dict[str, Any]] = None,
@@ -803,13 +950,7 @@ class ModelVersion(lineage_node.LineageNode):
             ValueError: If the model is not a HuggingFace text-generation model.
         """
         # Fetch model spec
-        model_spec = self._model_ops._fetch_model_spec(
-            database_name=None,
-            schema_name=None,
-            model_name=self._model_name,
-            version_name=self._version_name,
-            statement_params=statement_params,
-        )
+        model_spec = self._get_model_spec(statement_params)
         # Check if model_type is huggingface_pipeline
         model_type = model_spec.get("model_type")
@@ -894,9 +1035,10 @@ class ModelVersion(lineage_node.LineageNode):
                 When it is ``False``, this function executes the underlying service creation asynchronously
                 and returns an :class:`AsyncJob`.
             experimental_options: Experimental options for the service creation with custom inference engine.
-                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                Currently, `inference_engine`, `inference_engine_args_override`, and `autocapture` are supported.
                 `inference_engine` is the name of the inference engine to use.
                 `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
+                `autocapture` is a boolean to enable/disable inference table.
         """
         ...
@@ -952,9 +1094,10 @@ class ModelVersion(lineage_node.LineageNode):
                 When it is ``False``, this function executes the underlying service creation asynchronously
                 and returns an :class:`AsyncJob`.
             experimental_options: Experimental options for the service creation with custom inference engine.
-                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                Currently, `inference_engine`, `inference_engine_args_override`, and `autocapture` are supported.
                 `inference_engine` is the name of the inference engine to use.
                 `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
+                `autocapture` is a boolean to enable/disable inference table.
         """
         ...
@@ -1027,21 +1170,20 @@ class ModelVersion(lineage_node.LineageNode):
                 When it is False, this function executes the underlying service creation asynchronously
                 and returns an AsyncJob.
             experimental_options: Experimental options for the service creation with custom inference engine.
-                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                Currently, `inference_engine`, `inference_engine_args_override`, and `autocapture` are supported.
                 `inference_engine` is the name of the inference engine to use.
                 `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
+                `autocapture` is a boolean to enable/disable inference table.
         Raises:
-            ValueError: Illegal external access integration arguments.
+            ValueError: Illegal external access integration arguments, or if GPU resources are requested
+                but the model does not have GPU runtime support.
             exceptions.SnowparkSQLException: if service already exists.
         Returns:
             If `block=True`, return result information about service creation from server.
             Otherwise, return the service creation AsyncJob.
-        Raises:
-            ValueError: Illegal external access integration arguments.
         """
         statement_params = telemetry.get_statement_params(
             project=_TELEMETRY_PROJECT,
@@ -1064,12 +1206,16 @@ class ModelVersion(lineage_node.LineageNode):
         service_db_id, service_schema_id, service_id = sql_identifier.parse_fully_qualified_name(service_name)
-        # Check if model is HuggingFace text-generation before doing inference engine checks
-        if experimental_options:
-            self._check_huggingface_text_generation_model(statement_params)
+        # Validate GPU support if GPU resources are requested
+        self._throw_error_if_gpu_is_not_supported(gpu_requests, statement_params)
         inference_engine_args = inference_engine_utils._get_inference_engine_args(experimental_options)
+        # Check if model is HuggingFace text-generation before doing inference engine checks
+        # Only validate if inference engine is actually specified
+        if inference_engine_args is not None:
+            self._check_huggingface_text_generation_model(statement_params)
         # Enrich inference engine args if inference engine is specified
         if inference_engine_args is not None:
             inference_engine_args = inference_engine_utils._enrich_inference_engine_args(
@@ -1077,6 +1223,9 @@ class ModelVersion(lineage_node.LineageNode):
                 gpu_requests,
             )
+        # Extract autocapture from experimental_options
+        autocapture = experimental_options.get("autocapture") if experimental_options else None
         from snowflake.ml.model import event_handler
         from snowflake.snowpark import exceptions
@@ -1116,6 +1265,7 @@ class ModelVersion(lineage_node.LineageNode):
                     statement_params=statement_params,
                     progress_status=status,
                     inference_engine_args=inference_engine_args,
+                    autocapture=autocapture,
                 )
                 status.update(label="Model service created successfully", state="complete", expanded=False)
                 return result

snowflake/ml/model/_client/ops/model_ops.py CHANGED Viewed

@@ -952,6 +952,7 @@ class ModelOperator:
         partition_column: Optional[sql_identifier.SqlIdentifier] = None,
         statement_params: Optional[dict[str, str]] = None,
         is_partitioned: Optional[bool] = None,
+        explain_case_sensitive: bool = False,
     ) -> Union[type_hints.SupportedDataType, dataframe.DataFrame]:
         ...
@@ -967,6 +968,7 @@ class ModelOperator:
         service_name: sql_identifier.SqlIdentifier,
         strict_input_validation: bool = False,
         statement_params: Optional[dict[str, str]] = None,
+        explain_case_sensitive: bool = False,
     ) -> Union[type_hints.SupportedDataType, dataframe.DataFrame]:
         ...
@@ -986,6 +988,7 @@ class ModelOperator:
         partition_column: Optional[sql_identifier.SqlIdentifier] = None,
         statement_params: Optional[dict[str, str]] = None,
         is_partitioned: Optional[bool] = None,
+        explain_case_sensitive: bool = False,
     ) -> Union[type_hints.SupportedDataType, dataframe.DataFrame]:
         identifier_rule = model_signature.SnowparkIdentifierRule.INFERRED
@@ -1068,6 +1071,7 @@ class ModelOperator:
                     version_name=version_name,
                     statement_params=statement_params,
                     is_partitioned=is_partitioned or False,
+                    explain_case_sensitive=explain_case_sensitive,
                 )
         if keep_order:

snowflake/ml/model/_client/ops/service_ops.py CHANGED Viewed

@@ -206,6 +206,8 @@ class ServiceOperator:
         hf_model_args: Optional[HFModelArgs] = None,
         # inference engine model
         inference_engine_args: Optional[InferenceEngineArgs] = None,
+        # inference table
+        autocapture: Optional[bool] = None,
     ) -> Union[str, async_job.AsyncJob]:
         # Generate operation ID for this deployment
@@ -261,6 +263,7 @@ class ServiceOperator:
             gpu=gpu_requests,
             num_workers=num_workers,
             max_batch_rows=max_batch_rows,
+            autocapture=autocapture,
         )
         if hf_model_args:
             # hf model

snowflake/ml/model/_client/service/model_deployment_spec.py CHANGED Viewed

@@ -146,6 +146,7 @@ class ModelDeploymentSpec:
         gpu: Optional[Union[str, int]] = None,
         num_workers: Optional[int] = None,
         max_batch_rows: Optional[int] = None,
+        autocapture: Optional[bool] = None,
     ) -> "ModelDeploymentSpec":
         """Add service specification to the deployment spec.
@@ -161,6 +162,7 @@ class ModelDeploymentSpec:
             gpu: GPU requirement.
             num_workers: Number of workers.
             max_batch_rows: Maximum batch rows for inference.
+            autocapture: Whether to enable inference table.
         Raises:
             ValueError: If a job spec already exists.
@@ -186,6 +188,7 @@ class ModelDeploymentSpec:
             compute_pool=inference_compute_pool_name.identifier(),
             ingress_enabled=ingress_enabled,
             max_instances=max_instances,
+            autocapture=autocapture,
             **self._inference_spec,
         )
         return self

snowflake/ml/model/_client/service/model_deployment_spec_schema.py CHANGED Viewed

@@ -32,6 +32,7 @@ class Service(BaseModel):
     gpu: Optional[str] = None
     num_workers: Optional[int] = None
     max_batch_rows: Optional[int] = None
+    autocapture: Optional[bool] = None
     inference_engine_spec: Optional[InferenceEngineSpec] = None

snowflake/ml/model/_client/sql/model_version.py CHANGED Viewed

@@ -438,6 +438,7 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         partition_column: Optional[sql_identifier.SqlIdentifier],
         statement_params: Optional[dict[str, Any]] = None,
         is_partitioned: bool = True,
+        explain_case_sensitive: bool = False,
     ) -> dataframe.DataFrame:
         with_statements = []
         if len(input_df.queries["queries"]) == 1 and len(input_df.queries["post_actions"]) == 0:
@@ -505,7 +506,8 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         cols_to_drop = []
         for output_name, output_type, output_col_name in returns:
-            output_identifier = sql_identifier.SqlIdentifier(output_name).identifier()
+            case_sensitive = "explain" in method_name.resolved().lower() and explain_case_sensitive
+            output_identifier = sql_identifier.SqlIdentifier(output_name, case_sensitive=case_sensitive).identifier()
             if output_identifier != output_col_name:
                 cols_to_drop.append(output_identifier)
             output_cols.append(F.col(output_identifier).astype(output_type))

snowflake/ml/model/_model_composer/model_manifest/model_manifest.py CHANGED Viewed

@@ -87,7 +87,9 @@ class ModelManifest:
                     model_meta_schema.FunctionProperties.PARTITIONED.value, False
                 ),
                 wide_input=len(model_meta.signatures[target_method].inputs) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT,
-                options=model_method.get_model_method_options_from_options(options, target_method),
+                options=model_method.get_model_method_options_from_options(
+                    options, target_method, model_meta.model_type
+                ),
             )
             self.methods.append(method)

snowflake/ml/model/_model_composer/model_method/model_method.py CHANGED Viewed

@@ -11,6 +11,7 @@ from snowflake.ml.model._model_composer.model_manifest import model_manifest_sch
 from snowflake.ml.model._model_composer.model_method import (
     constants,
     function_generator,
+    utils,
 )
 from snowflake.ml.model._packager.model_meta import model_meta as model_meta_api
 from snowflake.ml.model.volatility import Volatility
@@ -31,12 +32,19 @@ class ModelMethodOptions(TypedDict):
 def get_model_method_options_from_options(
-    options: type_hints.ModelSaveOption, target_method: str
+    options: type_hints.ModelSaveOption, target_method: str, model_type: Optional[str] = None
 ) -> ModelMethodOptions:
     default_function_type = model_manifest_schema.ModelMethodFunctionTypes.FUNCTION.value
+    method_option = options.get("method_options", {}).get(target_method, {})
+    case_sensitive = method_option.get("case_sensitive", False)
     if target_method == "explain":
         default_function_type = model_manifest_schema.ModelMethodFunctionTypes.TABLE_FUNCTION.value
-    method_option = options.get("method_options", {}).get(target_method, {})
+        case_sensitive = utils.determine_explain_case_sensitive_from_method_options(
+            options.get("method_options", {}), target_method
+        )
+    elif model_type == "prophet":
+        # Prophet models always require TABLE_FUNCTION because they need entire time series context
+        default_function_type = model_manifest_schema.ModelMethodFunctionTypes.TABLE_FUNCTION.value
     global_function_type = options.get("function_type", default_function_type)
     function_type = method_option.get("function_type", global_function_type)
     if function_type not in [function_type.value for function_type in model_manifest_schema.ModelMethodFunctionTypes]:
@@ -48,7 +56,7 @@ def get_model_method_options_from_options(
     # Only include volatility if explicitly provided in method options
     result: ModelMethodOptions = ModelMethodOptions(
-        case_sensitive=method_option.get("case_sensitive", False),
+        case_sensitive=case_sensitive,
         function_type=function_type,
     )
     if resolved_volatility:

snowflake/ml/model/_model_composer/model_method/utils.py ADDED Viewed

@@ -0,0 +1,28 @@
+from __future__ import annotations
+from typing import Any, Mapping, Optional
+def determine_explain_case_sensitive_from_method_options(
+    method_options: Mapping[str, Optional[Mapping[str, Any]]],
+    target_method: str,
+) -> bool:
+    """Determine explain method case sensitivity from related predict methods.
+    Args:
+        method_options: Mapping from method name to its options. Each option may
+            contain ``"case_sensitive"`` to indicate SQL identifier sensitivity.
+        target_method: The target method name being resolved (e.g., an ``explain_*``
+            method).
+    Returns:
+        True if the explain method should be treated as case sensitive; otherwise False.
+    """
+    if "explain" not in target_method:
+        return False
+    predict_priority_methods = ["predict_proba", "predict", "predict_log_proba"]
+    for src_method in predict_priority_methods:
+        src_opts = method_options.get(src_method)
+        if src_opts is not None:
+            return bool(src_opts.get("case_sensitive", False))
+    return False

snowflake/ml/model/_packager/model_env/model_env.py CHANGED Viewed

@@ -240,14 +240,31 @@ class ModelEnv:
                 self._conda_dependencies[channel].remove(spec)
     def generate_env_for_cuda(self) -> None:
+        # Insert py-xgboost-gpu only for XGBoost versions < 3.0.0
         xgboost_spec = env_utils.find_dep_spec(
-            self._conda_dependencies, self._pip_requirements, conda_pkg_name="xgboost", remove_spec=True
+            self._conda_dependencies, self._pip_requirements, conda_pkg_name="xgboost", remove_spec=False
         )
         if xgboost_spec:
-            self.include_if_absent(
-                [ModelDependency(requirement=f"py-xgboost-gpu{xgboost_spec.specifier}", pip_name="xgboost")],
-                check_local_version=False,
-            )
+            # Only handle explicitly pinned versions. Insert GPU variant iff pinned major < 3.
+            pinned_major: Optional[int] = None
+            for spec in xgboost_spec.specifier:
+                if spec.operator in ("==", "===", ">", ">="):
+                    try:
+                        pinned_major = version.parse(spec.version).major
+                    except version.InvalidVersion:
+                        pinned_major = None
+                    break
+            if pinned_major is not None and pinned_major < 3:
+                xgboost_spec = env_utils.find_dep_spec(
+                    self._conda_dependencies, self._pip_requirements, conda_pkg_name="xgboost", remove_spec=True
+                )
+                if xgboost_spec:
+                    self.include_if_absent(
+                        [ModelDependency(requirement=f"py-xgboost-gpu{xgboost_spec.specifier}", pip_name="xgboost")],
+                        check_local_version=False,
+                    )
         tf_spec = env_utils.find_dep_spec(
             self._conda_dependencies, self._pip_requirements, conda_pkg_name="tensorflow", remove_spec=True

snowflake/ml/model/_packager/model_handlers/_utils.py CHANGED Viewed

@@ -305,3 +305,73 @@ def get_default_cuda_version() -> str:
         return torch.version.cuda or model_env.DEFAULT_CUDA_VERSION
     return model_env.DEFAULT_CUDA_VERSION
+def normalize_column_name(column_name: str) -> str:
+    """Normalize a column name to be a valid unquoted Snowflake SQL identifier.
+    Converts column names with spaces and special characters (e.g., "Christmas Day")
+    into valid lowercase unquoted SQL identifiers (e.g., "christmas_day") that can be used
+    without quotes in SQL queries. This follows Snowflake's unquoted identifier rules:
+    https://docs.snowflake.com/en/sql-reference/identifiers-syntax
+    The normalization approach is preferred over quoted identifiers because:
+    - Unquoted identifiers are simpler and more readable
+    - They don't require special handling in SQL contexts
+    - They avoid case-sensitivity complications
+    - Lowercase convention improves readability and follows Python/pandas conventions
+    This utility is useful for model handlers that need to ensure output column names
+    from models (e.g., Prophet holiday columns, feature names) are SQL-safe.
+    Args:
+        column_name: Original column name (may contain spaces, special chars, etc.)
+    Returns:
+        Normalized lowercase column name that is a valid unquoted SQL identifier matching
+        the pattern [a-z_][a-z0-9_$]*
+    Examples:
+        >>> normalize_column_name_for_snowflake("Christmas Day")
+        'christmas_day'
+        >>> normalize_column_name_for_snowflake("New Year's Day")
+        'new_year_s_day'
+        >>> normalize_column_name_for_snowflake("__private")
+        '__private'
+        >>> normalize_column_name_for_snowflake("2023_data")
+        '_2023_data'
+    """
+    import re
+    # Convert to lowercase for readability and consistency
+    normalized = column_name.lower()
+    # Replace spaces and special characters with underscores
+    # Keep only alphanumeric characters, underscores, and dollar signs (valid in unquoted identifiers)
+    normalized = re.sub(r"[^a-z0-9_$]", "_", normalized)
+    # Collapse consecutive underscores while preserving leading underscores
+    # This handles cases like "A  B" → "a__b" → "a_b" while preserving "__name" → "__name"
+    if len(normalized) > 1:
+        # Count and preserve leading underscores, collapse the rest
+        leading_underscores = len(normalized) - len(normalized.lstrip("_"))
+        rest_of_string = normalized[leading_underscores:]
+        rest_collapsed = re.sub(r"_+", "_", rest_of_string)
+        normalized = "_" * leading_underscores + rest_collapsed
+    if normalized == "_":
+        return "_"
+    normalized = normalized.rstrip("_")
+    # Ensure it starts with a letter or underscore (SQL requirement)
+    # Unquoted identifiers must match: [a-z_][a-z0-9_$]*
+    if normalized and normalized[0].isdigit():
+        normalized = "_" + normalized
+    # If normalization resulted in empty string, use a default
+    # (This happens when input was only underscores like "___")
+    if not normalized:
+        normalized = "column"
+    return normalized

snowflake-ml-python 1.16.0__py3-none-any.whl → 1.18.0__py3-none-any.whl

snowflake-ml-python 1.16.0py3-none-any.whl → 1.18.0py3-none-any.whl