PyPI - snowflake-ml-python - Versions diffs - 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl - Mend

snowflake-ml-python 1.8.2py3-none-any.whl → 1.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (322) hide show

snowflake/ml/_internal/utils/identifier.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import re
-from typing import Any, List, Optional, Tuple, Union, overload
+from typing import Any, Optional, Union, overload
 from snowflake.snowpark._internal.analyzer import analyzer_utils
@@ -12,7 +12,7 @@ SF_IDENTIFIER_RE = re.compile(_SF_IDENTIFIER)
 _SF_SCHEMA_LEVEL_OBJECT = (
     rf"(?:(?:(?P<db>{_SF_IDENTIFIER})\.)?(?P<schema>{_SF_IDENTIFIER})\.)?(?P<object>{_SF_IDENTIFIER})"
 )
-_SF_STAGE_PATH = rf"{_SF_SCHEMA_LEVEL_OBJECT}(?P<path>.*)"
+_SF_STAGE_PATH = rf"@?{_SF_SCHEMA_LEVEL_OBJECT}(?P<path>/.*)?"
 _SF_SCHEMA_LEVEL_OBJECT_RE = re.compile(_SF_SCHEMA_LEVEL_OBJECT)
 _SF_STAGE_PATH_RE = re.compile(_SF_STAGE_PATH)
@@ -112,7 +112,7 @@ def get_inferred_name(name: str) -> str:
     return escaped_id
-def concat_names(names: List[str]) -> str:
+def concat_names(names: list[str]) -> str:
     """Concatenates `names` to form one valid id.
@@ -142,7 +142,7 @@ def rename_to_valid_snowflake_identifier(name: str) -> str:
 def parse_schema_level_object_identifier(
     object_name: str,
-) -> Tuple[Union[str, Any], Union[str, Any], Union[str, Any]]:
+) -> tuple[Union[str, Any], Union[str, Any], Union[str, Any]]:
     """Parse a string which starts with schema level object.
     Args:
@@ -172,7 +172,7 @@ def parse_schema_level_object_identifier(
 def parse_snowflake_stage_path(
     path: str,
-) -> Tuple[Union[str, Any], Union[str, Any], Union[str, Any], Union[str, Any]]:
+) -> tuple[Union[str, Any], Union[str, Any], Union[str, Any], Union[str, Any]]:
     """Parse a string which represents a snowflake stage path.
     Args:
@@ -197,7 +197,7 @@ def parse_snowflake_stage_path(
         res.group("db"),
         res.group("schema"),
         res.group("object"),
-        res.group("path"),
+        res.group("path") or "",
     )
@@ -260,11 +260,11 @@ def get_unescaped_names(ids: str) -> str:
 @overload
-def get_unescaped_names(ids: List[str]) -> List[str]:
+def get_unescaped_names(ids: list[str]) -> list[str]:
     ...
-def get_unescaped_names(ids: Optional[Union[str, List[str]]]) -> Optional[Union[str, List[str]]]:
+def get_unescaped_names(ids: Optional[Union[str, list[str]]]) -> Optional[Union[str, list[str]]]:
     """Given a user provided identifier(s), this method will compute the equivalent column name identifier(s) in the
     response pandas dataframe(i.e., in the response of snowpark_df.to_pandas()) using the rules defined here
     https://docs.snowflake.com/en/sql-reference/identifiers-syntax.
@@ -308,11 +308,11 @@ def get_inferred_names(names: str) -> str:
 @overload
-def get_inferred_names(names: List[str]) -> List[str]:
+def get_inferred_names(names: list[str]) -> list[str]:
     ...
-def get_inferred_names(names: Optional[Union[str, List[str]]]) -> Optional[Union[str, List[str]]]:
+def get_inferred_names(names: Optional[Union[str, list[str]]]) -> Optional[Union[str, list[str]]]:
     """Given a user provided *string(s)*, this method will compute the equivalent column name identifier(s)
     in case of column name contains special characters, and maintains case-sensitivity
     https://docs.snowflake.com/en/sql-reference/identifiers-syntax.

snowflake/ml/_internal/utils/import_utils.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import importlib
-from typing import Any, Tuple
+from typing import Any
 class MissingOptionalDependency:
@@ -46,7 +46,7 @@ def import_with_fallbacks(*targets: str) -> Any:
     raise ImportError(f"None of the requested targets could be imported. Requested: {', '.join(targets)}")
-def import_or_get_dummy(target: str) -> Tuple[Any, bool]:
+def import_or_get_dummy(target: str) -> tuple[Any, bool]:
     """Try to import the the given target or return a dummy object.
     If the import target (package/module/symbol) is available, the target will be returned. If it is not available,

snowflake/ml/_internal/utils/parallelize.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import math
 from contextlib import contextmanager
 from timeit import default_timer
-from typing import Any, Callable, Dict, Generator, Iterable, List, Optional
+from typing import Any, Callable, Generator, Iterable, Optional
 import snowflake.snowpark.functions as F
 from snowflake import snowpark
@@ -17,17 +17,17 @@ def timer() -> Generator[Callable[[], float], None, None]:
     yield lambda: elapser()
-def _flatten(L: Iterable[List[Any]]) -> List[Any]:
+def _flatten(L: Iterable[list[Any]]) -> list[Any]:
     return [val for sublist in L for val in sublist]
 def map_dataframe_by_column(
     df: snowpark.DataFrame,
-    cols: List[str],
-    map_func: Callable[[snowpark.DataFrame, List[str]], snowpark.DataFrame],
+    cols: list[str],
+    map_func: Callable[[snowpark.DataFrame, list[str]], snowpark.DataFrame],
     partition_size: int,
-    statement_params: Optional[Dict[str, Any]] = None,
-) -> List[List[Any]]:
+    statement_params: Optional[dict[str, Any]] = None,
+) -> list[list[Any]]:
     """Applies the `map_func` to the input DataFrame by parallelizing it over subsets of the column.
     Because the return results are materialized as Python lists *in memory*, this method should
@@ -84,7 +84,7 @@ def map_dataframe_by_column(
             unioned_df = mapped_df if unioned_df is None else unioned_df.union_all(mapped_df)
     # Store results in a list of size |n_partitions| x |n_rows| x |n_output_cols|
-    all_results: List[List[List[Any]]] = [[] for _ in range(n_partitions - 1)]
+    all_results: list[list[list[Any]]] = [[] for _ in range(n_partitions - 1)]
     # Collect the results of the first n-1 partitions, removing the partition_id column
     unioned_result = unioned_df.collect(statement_params=statement_params) if unioned_df is not None else []

snowflake/ml/_internal/utils/pkg_version_utils.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import sys
 import warnings
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Optional, Union
 from packaging.version import Version
@@ -8,7 +8,7 @@ from snowflake.ml._internal import telemetry
 from snowflake.snowpark import AsyncJob, Row, Session
 from snowflake.snowpark._internal import utils as snowpark_utils
-cache: Dict[str, Optional[str]] = {}
+cache: dict[str, Optional[str]] = {}
 _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "utils"
@@ -23,8 +23,8 @@ def is_relaxed() -> bool:
 def get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-    pkg_versions: List[str], session: Session, subproject: Optional[str] = None
-) -> List[str]:
+    pkg_versions: list[str], session: Session, subproject: Optional[str] = None
+) -> list[str]:
     if snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
         return pkg_versions
     else:
@@ -32,9 +32,9 @@ def get_valid_pkg_versions_supported_in_snowflake_conda_channel(
 def _get_valid_pkg_versions_supported_in_snowflake_conda_channel_async(
-    pkg_versions: List[str], session: Session, subproject: Optional[str] = None
-) -> List[str]:
-    pkg_version_async_job_list: List[Tuple[str, AsyncJob]] = []
+    pkg_versions: list[str], session: Session, subproject: Optional[str] = None
+) -> list[str]:
+    pkg_version_async_job_list: list[tuple[str, AsyncJob]] = []
     for pkg_version in pkg_versions:
         if pkg_version not in cache:
             # Execute pkg version queries asynchronously.
@@ -64,7 +64,7 @@ def _get_valid_pkg_versions_supported_in_snowflake_conda_channel_async(
 def _query_pkg_version_supported_in_snowflake_conda_channel(
     pkg_version: str, session: Session, block: bool, subproject: Optional[str] = None
-) -> Union[AsyncJob, List[Row]]:
+) -> Union[AsyncJob, list[Row]]:
     tokens = pkg_version.split("==")
     if len(tokens) != 2:
         raise RuntimeError(
@@ -102,9 +102,9 @@ def _query_pkg_version_supported_in_snowflake_conda_channel(
     return pkg_version_list_or_async_job
-def _get_conda_packages_and_emit_warnings(pkg_versions: List[str]) -> List[str]:
-    pkg_version_conda_list: List[str] = []
-    pkg_version_warning_list: List[List[str]] = []
+def _get_conda_packages_and_emit_warnings(pkg_versions: list[str]) -> list[str]:
+    pkg_version_conda_list: list[str] = []
+    pkg_version_warning_list: list[list[str]] = []
     for pkg_version in pkg_versions:
         try:
             conda_pkg_version = cache[pkg_version]

snowflake/ml/_internal/utils/query_result_checker.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations  # for return self methods
 from functools import partial
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Optional
 from snowflake import connector, snowpark
 from snowflake.ml._internal.utils import formatting
@@ -123,7 +123,7 @@ def cell_value_by_column_matcher(
     return True
-_DEFAULT_MATCHERS: List[Callable[[List[snowpark.Row], Optional[str]], bool]] = [
+_DEFAULT_MATCHERS: list[Callable[[list[snowpark.Row], Optional[str]], bool]] = [
     partial(result_dimension_matcher, 1, 1),
     partial(column_name_matcher, "status"),
 ]
@@ -252,12 +252,12 @@ class SqlResultValidator(ResultValidator):
     """
     def __init__(
-        self, session: snowpark.Session, query: str, statement_params: Optional[Dict[str, Any]] = None
+        self, session: snowpark.Session, query: str, statement_params: Optional[dict[str, Any]] = None
     ) -> None:
         self._session: snowpark.Session = session
         self._query: str = query
         self._success_matchers: list[Callable[[list[snowpark.Row], Optional[str]], bool]] = []
-        self._statement_params: Optional[Dict[str, Any]] = statement_params
+        self._statement_params: Optional[dict[str, Any]] = statement_params
     def _get_result(self) -> list[snowpark.Row]:
         """Collect the result of the given SQL query."""

snowflake/ml/_internal/utils/snowflake_env.py CHANGED Viewed

@@ -1,15 +1,15 @@
 import enum
-from typing import Any, Dict, Optional, TypedDict, cast
+from typing import Any, Optional, TypedDict, cast
 from packaging import version
 from typing_extensions import NotRequired, Required
 from snowflake.ml._internal.utils import query_result_checker
-from snowflake.snowpark import session
+from snowflake.snowpark import exceptions as sp_exceptions, session
 def get_current_snowflake_version(
-    sess: session.Session, *, statement_params: Optional[Dict[str, Any]] = None
+    sess: session.Session, *, statement_params: Optional[dict[str, Any]] = None
 ) -> version.Version:
     """Get Snowflake Version as a version.Version object follow PEP way of versioning, that is to say:
         "7.44.2 b202312132139364eb71238" to <Version('7.44.2+b202312132139364eb71238')>
@@ -60,8 +60,8 @@ class SnowflakeRegion(TypedDict):
 def get_regions(
-    sess: session.Session, *, statement_params: Optional[Dict[str, Any]] = None
-) -> Dict[str, SnowflakeRegion]:
+    sess: session.Session, *, statement_params: Optional[dict[str, Any]] = None
+) -> dict[str, SnowflakeRegion]:
     res = (
         query_result_checker.SqlResultValidator(sess, "SHOW REGIONS", statement_params=statement_params)
         .has_column("snowflake_region")
@@ -93,7 +93,7 @@ def get_regions(
     return res_dict
-def get_current_region_id(sess: session.Session, *, statement_params: Optional[Dict[str, Any]] = None) -> str:
+def get_current_region_id(sess: session.Session, *, statement_params: Optional[dict[str, Any]] = None) -> str:
     res = (
         query_result_checker.SqlResultValidator(
             sess, "SELECT CURRENT_REGION() AS CURRENT_REGION", statement_params=statement_params
@@ -103,3 +103,25 @@ def get_current_region_id(sess: session.Session, *, statement_params: Optional[D
     )
     return cast(str, res.CURRENT_REGION)
+def get_current_cloud(
+    sess: session.Session,
+    default: Optional[SnowflakeCloudType] = None,
+    *,
+    statement_params: Optional[dict[str, Any]] = None,
+) -> SnowflakeCloudType:
+    region_id = get_current_region_id(sess, statement_params=statement_params)
+    try:
+        region = get_regions(sess, statement_params=statement_params)[region_id]
+        return region["cloud"]
+    except sp_exceptions.SnowparkSQLException:
+        # SHOW REGIONS not available, try to infer cloud from region name
+        region_name = region_id.split(".", 1)[-1]  # Drop region group if any, e.g. PUBLIC
+        cloud_name_maybe = region_name.split("_", 1)[0]  # Extract cloud name, e.g. AWS_US_WEST -> AWS
+        try:
+            return SnowflakeCloudType.from_value(cloud_name_maybe)
+        except ValueError:
+            if default:
+                return default
+            raise

snowflake/ml/_internal/utils/snowpark_dataframe_utils.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import logging
 import warnings
-from typing import List, Optional
+from typing import Optional
 from snowflake import snowpark
 from snowflake.ml._internal.utils import sql_identifier
 from snowflake.snowpark import functions, types
-def cast_snowpark_dataframe(df: snowpark.DataFrame, ignore_columns: Optional[List[str]] = None) -> snowpark.DataFrame:
+def cast_snowpark_dataframe(df: snowpark.DataFrame, ignore_columns: Optional[list[str]] = None) -> snowpark.DataFrame:
     """Cast columns in the dataframe to types that are compatible with tensor.
     It assists FileSet.make() in performing implicit data casting.

snowflake/ml/_internal/utils/sql_identifier.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple, Union
+from typing import Optional, Union
 from snowflake.ml._internal.utils import identifier
@@ -77,13 +77,13 @@ class SqlIdentifier(str):
         return super().__hash__()
-def to_sql_identifiers(list_of_str: List[str], *, case_sensitive: bool = False) -> List[SqlIdentifier]:
+def to_sql_identifiers(list_of_str: list[str], *, case_sensitive: bool = False) -> list[SqlIdentifier]:
     return [SqlIdentifier(val, case_sensitive=case_sensitive) for val in list_of_str]
 def parse_fully_qualified_name(
     name: str,
-) -> Tuple[Optional[SqlIdentifier], Optional[SqlIdentifier], SqlIdentifier]:
+) -> tuple[Optional[SqlIdentifier], Optional[SqlIdentifier], SqlIdentifier]:
     db, schema, object = identifier.parse_schema_level_object_identifier(name)
     assert name is not None, f"Unable parse the input name `{name}` as fully qualified."

snowflake/ml/_internal/utils/table_manager.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Optional
 from snowflake import snowpark
 from snowflake.ml._internal.utils import formatting, identifier, query_result_checker
@@ -24,8 +24,8 @@ def create_single_table(
     database_name: str,
     schema_name: str,
     table_name: str,
-    table_schema: List[Tuple[str, str]],
-    statement_params: Optional[Dict[str, Any]] = None,
+    table_schema: list[tuple[str, str]],
+    statement_params: Optional[dict[str, Any]] = None,
 ) -> str:
     """Creates a single table for registry and returns the fully qualified name of the table.
@@ -55,7 +55,7 @@ def create_single_table(
     return fully_qualified_table_name
-def insert_table_entry(session: snowpark.Session, table: str, columns: Dict[str, Any]) -> List[snowpark.Row]:
+def insert_table_entry(session: snowpark.Session, table: str, columns: dict[str, Any]) -> list[snowpark.Row]:
     """Insert an entry into an internal Model Registry table.
     Args:
@@ -99,9 +99,9 @@ def validate_table_exist(session: snowpark.Session, table: str, qualified_schema
     return len(tables) == 1
-def get_table_schema(session: snowpark.Session, table_name: str, qualified_schema_name: str) -> Dict[str, str]:
+def get_table_schema(session: snowpark.Session, table_name: str, qualified_schema_name: str) -> dict[str, str]:
     result = session.sql(f"DESC TABLE {qualified_schema_name}.{table_name}").collect()
-    schema_dict: Dict[str, str] = {}
+    schema_dict: dict[str, str] = {}
     for row in result:
         schema_dict[row["name"]] = row["type"]
     return schema_dict
@@ -112,13 +112,13 @@ def get_table_schema_types(
     database: str,
     schema: str,
     table_name: str,
-) -> Dict[str, types.DataType]:
+) -> dict[str, types.DataType]:
     fully_qualified_table_name = identifier.get_schema_level_object_identifier(
         db=database, schema=schema, object_name=table_name
     )
-    struct_fields: List[types.StructField] = session.table(fully_qualified_table_name).schema.fields
+    struct_fields: list[types.StructField] = session.table(fully_qualified_table_name).schema.fields
-    schema_dict: Dict[str, types.DataType] = {}
+    schema_dict: dict[str, types.DataType] = {}
     for field in struct_fields:
         schema_dict[field.name] = field.datatype
     return schema_dict

snowflake/ml/data/_internal/arrow_ingestor.py CHANGED Viewed

@@ -2,7 +2,7 @@ import collections
 import logging
 import os
 import time
-from typing import Any, Deque, Dict, Iterator, List, Optional, Sequence, Union
+from typing import Any, Deque, Iterator, Optional, Sequence, Union
 import numpy as np
 import numpy.typing as npt
@@ -71,7 +71,7 @@ class ArrowIngestor(data_ingestor.DataIngestor):
         return cls(session, sources)
     @property
-    def data_sources(self) -> List[data_source.DataSource]:
+    def data_sources(self) -> list[data_source.DataSource]:
         return self._data_sources
     def to_batches(
@@ -79,7 +79,7 @@ class ArrowIngestor(data_ingestor.DataIngestor):
         batch_size: int,
         shuffle: bool = True,
         drop_last_batch: bool = True,
-    ) -> Iterator[Dict[str, npt.NDArray[Any]]]:
+    ) -> Iterator[dict[str, npt.NDArray[Any]]]:
         """Iterate through PyArrow Dataset to generate batches whose length equals to expected batch size.
         As we are generating batches with the exactly same length, the last few rows in each file might get left as they
@@ -120,7 +120,7 @@ class ArrowIngestor(data_ingestor.DataIngestor):
     def _get_dataset(self, shuffle: bool) -> pds.Dataset:
         format = self._format
-        sources: List[Any] = []
+        sources: list[Any] = []
         source_format = None
         for source in self._data_sources:
             if isinstance(source, str):
@@ -155,7 +155,7 @@ class ArrowIngestor(data_ingestor.DataIngestor):
         pa_dataset: pds.Dataset = pds.dataset(sources, format=format, **self._kwargs)
         return pa_dataset
-    def _get_batches_from_buffer(self, batch_size: int) -> Dict[str, npt.NDArray[Any]]:
+    def _get_batches_from_buffer(self, batch_size: int) -> dict[str, npt.NDArray[Any]]:
         """Generate new batches from the existing record batch buffer."""
         cnt_rbs_num_rows = 0
         candidates = []
@@ -180,7 +180,7 @@ class ArrowIngestor(data_ingestor.DataIngestor):
         return _record_batch_to_arrays(res)
-def _merge_record_batches(record_batches: List[pa.RecordBatch]) -> pa.RecordBatch:
+def _merge_record_batches(record_batches: list[pa.RecordBatch]) -> pa.RecordBatch:
     """Merge a list of arrow RecordBatches into one. Similar to MergeTables."""
     if not record_batches:
         return _EMPTY_RECORD_BATCH
@@ -192,7 +192,7 @@ def _merge_record_batches(record_batches: List[pa.RecordBatch]) -> pa.RecordBatc
     return batches[0]
-def _record_batch_to_arrays(rb: pa.RecordBatch) -> Dict[str, npt.NDArray[Any]]:
+def _record_batch_to_arrays(rb: pa.RecordBatch) -> dict[str, npt.NDArray[Any]]:
     """Transform the record batch to a (string, numpy array) dict."""
     batch_dict = {}
     for column, column_schema in zip(rb, rb.schema):

snowflake/ml/data/data_connector.py CHANGED Viewed

@@ -1,28 +1,13 @@
 import os
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    Generator,
-    List,
-    Optional,
-    Sequence,
-    Type,
-    TypeVar,
-    cast,
-)
+from typing import TYPE_CHECKING, Any, Generator, Optional, Sequence, TypeVar
 import numpy.typing as npt
 from typing_extensions import deprecated
 from snowflake import snowpark
-from snowflake.ml._internal import telemetry
+from snowflake.ml._internal import env, telemetry
 from snowflake.ml.data import data_ingestor, data_source
 from snowflake.ml.data._internal.arrow_ingestor import ArrowIngestor
-from snowflake.ml.modeling._internal.constants import (
-    IN_ML_RUNTIME_ENV_VAR,
-    USE_OPTIMIZED_DATA_INGESTOR,
-)
 from snowflake.snowpark import context as sf_context
 if TYPE_CHECKING:
@@ -43,7 +28,7 @@ DataConnectorType = TypeVar("DataConnectorType", bound="DataConnector")
 class DataConnector:
     """Snowflake data reader which provides application integration connectors"""
-    DEFAULT_INGESTOR_CLASS: Type[data_ingestor.DataIngestor] = ArrowIngestor
+    DEFAULT_INGESTOR_CLASS: type[data_ingestor.DataIngestor] = ArrowIngestor
     def __init__(
         self,
@@ -54,27 +39,22 @@ class DataConnector:
         self._kwargs = kwargs
     @classmethod
-    @snowpark._internal.utils.private_preview(version="1.6.0")
     def from_dataframe(
-        cls: Type[DataConnectorType],
+        cls: type[DataConnectorType],
         df: snowpark.DataFrame,
-        ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
+        ingestor_class: Optional[type[data_ingestor.DataIngestor]] = None,
         **kwargs: Any,
     ) -> DataConnectorType:
         if len(df.queries["queries"]) != 1 or len(df.queries["post_actions"]) != 0:
             raise ValueError("DataFrames with multiple queries and/or post-actions not supported")
-        return cast(
-            DataConnectorType,
-            cls.from_sql(df.queries["queries"][0], session=df._session, ingestor_class=ingestor_class, **kwargs),
-        )
+        return cls.from_sql(df.queries["queries"][0], session=df._session, ingestor_class=ingestor_class, **kwargs)
     @classmethod
-    @snowpark._internal.utils.private_preview(version="1.7.3")
     def from_sql(
-        cls: Type[DataConnectorType],
+        cls: type[DataConnectorType],
         query: str,
         session: Optional[snowpark.Session] = None,
-        ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
+        ingestor_class: Optional[type[data_ingestor.DataIngestor]] = None,
         **kwargs: Any,
     ) -> DataConnectorType:
         session = session or sf_context.get_active_session()
@@ -83,9 +63,9 @@ class DataConnector:
     @classmethod
     def from_dataset(
-        cls: Type[DataConnectorType],
+        cls: type[DataConnectorType],
         ds: "dataset.Dataset",
-        ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
+        ingestor_class: Optional[type[data_ingestor.DataIngestor]] = None,
         **kwargs: Any,
     ) -> DataConnectorType:
         dsv = ds.selected_version
@@ -102,10 +82,10 @@ class DataConnector:
         func_params_to_log=["sources", "ingestor_class"],
     )
     def from_sources(
-        cls: Type[DataConnectorType],
+        cls: type[DataConnectorType],
         session: snowpark.Session,
         sources: Sequence[data_source.DataSource],
-        ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
+        ingestor_class: Optional[type[data_ingestor.DataIngestor]] = None,
         **kwargs: Any,
     ) -> DataConnectorType:
         ingestor_class = ingestor_class or cls.DEFAULT_INGESTOR_CLASS
@@ -113,7 +93,7 @@ class DataConnector:
         return cls(ingestor, **kwargs)
     @property
-    def data_sources(self) -> List[data_source.DataSource]:
+    def data_sources(self) -> list[data_source.DataSource]:
         return self._ingestor.data_sources
     @telemetry.send_api_usage_telemetry(
@@ -139,7 +119,7 @@ class DataConnector:
         """
         import tensorflow as tf
-        def generator() -> Generator[Dict[str, npt.NDArray[Any]], None, None]:
+        def generator() -> Generator[dict[str, npt.NDArray[Any]], None, None]:
             yield from self._ingestor.to_batches(batch_size, shuffle, drop_last_batch)
         # Derive TensorFlow signature
@@ -269,11 +249,10 @@ class DataConnector:
 # Switch to use Runtime's Data Ingester if running in ML runtime
 # Fail silently if the data ingester is not found
-if os.getenv(IN_ML_RUNTIME_ENV_VAR) and os.getenv(USE_OPTIMIZED_DATA_INGESTOR):
+if env.IN_ML_RUNTIME and os.getenv(env.USE_OPTIMIZED_DATA_INGESTOR):
     try:
         from runtime_external_entities import get_ingester_class
         DataConnector.DEFAULT_INGESTOR_CLASS = get_ingester_class()
     except ImportError:
         """Runtime Default Ingester not found, ignore"""
-        pass

snowflake/ml/data/data_ingestor.py CHANGED Viewed

@@ -1,15 +1,4 @@
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    Iterator,
-    List,
-    Optional,
-    Protocol,
-    Sequence,
-    Type,
-    TypeVar,
-)
+from typing import TYPE_CHECKING, Any, Iterator, Optional, Protocol, Sequence, TypeVar
 from numpy import typing as npt
@@ -26,12 +15,12 @@ DataIngestorType = TypeVar("DataIngestorType", bound="DataIngestor")
 class DataIngestor(Protocol):
     @classmethod
     def from_sources(
-        cls: Type[DataIngestorType], session: snowpark.Session, sources: Sequence[data_source.DataSource]
+        cls: type[DataIngestorType], session: snowpark.Session, sources: Sequence[data_source.DataSource]
     ) -> DataIngestorType:
         raise NotImplementedError
     @property
-    def data_sources(self) -> List[data_source.DataSource]:
+    def data_sources(self) -> list[data_source.DataSource]:
         raise NotImplementedError
     def to_batches(
@@ -39,7 +28,7 @@ class DataIngestor(Protocol):
         batch_size: int,
         shuffle: bool = True,
         drop_last_batch: bool = True,
-    ) -> Iterator[Dict[str, npt.NDArray[Any]]]:
+    ) -> Iterator[dict[str, npt.NDArray[Any]]]:
         raise NotImplementedError
     def to_pandas(self, limit: Optional[int] = None) -> "pd.DataFrame":

snowflake/ml/data/data_source.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import dataclasses
-from typing import List, Optional, Union
+from typing import Optional, Union
 @dataclasses.dataclass(frozen=True)
@@ -17,7 +17,7 @@ class DatasetInfo:
     fully_qualified_name: str
     version: str
     url: Optional[str] = None
-    exclude_cols: Optional[List[str]] = None
+    exclude_cols: Optional[list[str]] = None
 DataSource = Union[DataFrameInfo, DatasetInfo, str]

snowflake/ml/data/ingestor_utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional
+from typing import Optional
 import fsspec
 import pyarrow as pa
@@ -33,7 +33,7 @@ def _get_dataframe_cursor(session: snowpark.Session, df_info: data_source.DataFr
 def get_dataframe_result_batches(
     session: snowpark.Session, df_info: data_source.DataFrameInfo
-) -> List[result_batch.ResultBatch]:
+) -> list[result_batch.ResultBatch]:
     """Retrieve the ResultBatches for a given query"""
     cursor = _get_dataframe_cursor(session, df_info)
     batches = cursor.get_result_batches()
@@ -63,7 +63,7 @@ def get_dataset_filesystem(
 def get_dataset_files(
     session: snowpark.Session, ds_info: data_source.DatasetInfo, filesystem: Optional[fsspec.AbstractFileSystem] = None
-) -> List[str]:
+) -> list[str]:
     """Get the list of files in a given Dataset"""
     if filesystem is None:
         filesystem = get_dataset_filesystem(session, ds_info)

snowflake-ml-python 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl

snowflake-ml-python 1.8.2py3-none-any.whl → 1.8.4py3-none-any.whl