PyPI - snowflake-ml-python - Versions diffs - 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl - Mend

snowflake-ml-python 1.5.0py3-none-any.whl → 1.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

snowflake/cortex/_sentiment.py CHANGED Viewed

@@ -11,7 +11,7 @@ from snowflake.ml._internal import telemetry
 )
 def Sentiment(
     text: Union[str, snowpark.Column], session: Optional[snowpark.Session] = None
-) -> Union[str, snowpark.Column]:
+) -> Union[float, snowpark.Column]:
     """Sentiment calls into the LLM inference service to perform sentiment analysis on the input text.
     Args:
@@ -21,11 +21,14 @@ def Sentiment(
     Returns:
         A column of floats. 1 represents positive sentiment, -1 represents negative sentiment.
     """
     return _sentiment_impl("snowflake.cortex.sentiment", text, session=session)
 def _sentiment_impl(
     function: str, text: Union[str, snowpark.Column], session: Optional[snowpark.Session] = None
-) -> Union[str, snowpark.Column]:
-    return call_sql_function(function, session, text)
+) -> Union[float, snowpark.Column]:
+    output = call_sql_function(function, session, text)
+    if isinstance(output, snowpark.Column):
+        return output
+    return float(output)

snowflake/ml/_internal/env_utils.py CHANGED Viewed

@@ -553,6 +553,9 @@ def load_conda_env_file(
         A tuple of Dict of conda dependencies after validated, optional pip requirements if exist
         and a string 'major.minor.patchlevel' of python version.
     """
+    if not path.exists():
+        return collections.defaultdict(list), None, None
     with open(path, encoding="utf-8") as f:
         env = yaml.safe_load(stream=f)
@@ -603,6 +606,9 @@ def load_requirements_file(path: pathlib.Path) -> List[requirements.Requirement]
     Returns:
         List of dependencies string after validated.
     """
+    if not path.exists():
+        return []
     with open(path, encoding="utf-8") as f:
         reqs = f.readlines()

snowflake/ml/_internal/lineage/lineage_utils.py ADDED Viewed

@@ -0,0 +1,95 @@
+import copy
+import functools
+from typing import Any, Callable, List
+from snowflake import snowpark
+from snowflake.ml._internal.lineage import data_source
+DATA_SOURCES_ATTR = "_data_sources"
+def _get_datasources(*args: Any) -> List[data_source.DataSource]:
+    """Helper method for extracting data sources attribute from DataFrames in an argument list"""
+    result = []
+    for arg in args:
+        srcs = getattr(arg, DATA_SOURCES_ATTR, None)
+        if isinstance(srcs, list) and all(isinstance(s, data_source.DataSource) for s in srcs):
+            result += srcs
+    return result
+def _wrap_func(
+    fn: Callable[..., snowpark.DataFrame], data_sources: List[data_source.DataSource]
+) -> Callable[..., snowpark.DataFrame]:
+    """Wrap a DataFrame transform function to propagate data_sources to derived DataFrames."""
+    @functools.wraps(fn)
+    def wrapped(*args: Any, **kwargs: Any) -> snowpark.DataFrame:
+        df = fn(*args, **kwargs)
+        patch_dataframe(df, data_sources=data_sources, inplace=True)
+        return df
+    return wrapped
+def patch_dataframe(
+    df: snowpark.DataFrame, data_sources: List[data_source.DataSource], inplace: bool = False
+) -> snowpark.DataFrame:
+    """
+    Monkey patch a DataFrame to add attach the provided data_sources as an attribute of the DataFrame.
+    Also patches the DataFrame's transformation functions to propagate the new data sources attribute to
+    derived DataFrames.
+    Args:
+        df: DataFrame to be patched
+        data_sources: List of data sources for the DataFrame
+        inplace: If True, patches to DataFrame in-place. If False, creates a shallow copy of the DataFrame.
+    Returns:
+        Patched DataFrame
+    """
+    # Instance-level monkey-patches
+    funcs = [
+        "_with_plan",
+        "_lateral",
+        "group_by",
+        "group_by_grouping_sets",
+        "cube",
+        "pivot",
+        "rollup",
+        "cache_result",
+        "_to_df",  # RelationalGroupedDataFrame
+    ]
+    if not inplace:
+        df = copy.copy(df)
+    setattr(df, DATA_SOURCES_ATTR, data_sources)
+    for func in funcs:
+        fn = getattr(df, func, None)
+        if fn is not None:
+            setattr(df, func, _wrap_func(fn, data_sources=data_sources))
+    return df
+def _wrap_class_func(fn: Callable[..., snowpark.DataFrame]) -> Callable[..., snowpark.DataFrame]:
+    @functools.wraps(fn)
+    def wrapped(*args: Any, **kwargs: Any) -> snowpark.DataFrame:
+        df = fn(*args, **kwargs)
+        data_sources = _get_datasources(*args) + _get_datasources(*kwargs.values())
+        if data_sources:
+            patch_dataframe(df, data_sources, inplace=True)
+        return df
+    return wrapped
+# Class-level monkey-patches
+for klass, func_list in {
+    snowpark.DataFrame: [
+        "__copy__",
+    ],
+    snowpark.RelationalGroupedDataFrame: [],
+}.items():
+    assert isinstance(func_list, list)  # mypy
+    for func in func_list:
+        fn = getattr(klass, func)
+        setattr(klass, func, _wrap_class_func(fn))

snowflake/ml/_internal/telemetry.py CHANGED Viewed

@@ -50,6 +50,7 @@ class TelemetryField(enum.Enum):
     # types of telemetry
     TYPE_FUNCTION_USAGE = "function_usage"
     TYPE_SNOWML_SPCS_USAGE = "snowml_spcs_usage"
+    TYPE_SNOWML_PIPELINE_USAGE = "snowml_pipeline_usage"
     # message keys for telemetry
     KEY_PROJECT = "project"
     KEY_SUBPROJECT = "subproject"

snowflake/ml/_internal/utils/identifier.py CHANGED Viewed

@@ -156,7 +156,7 @@ def parse_schema_level_object_identifier(
     """
     res = _SF_SCHEMA_LEVEL_OBJECT_RE.fullmatch(path)
     if not res:
-        raise ValueError(f"Invalid identifier. It should start with database.schema.stage. Getting {path}")
+        raise ValueError(f"Invalid identifier. It should start with database.schema.object. Getting {path}")
     return (
         res.group("db"),
         res.group("schema"),

snowflake/ml/_internal/utils/sql_identifier.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Optional, Tuple
 from snowflake.ml._internal.utils import identifier
@@ -79,3 +79,16 @@ class SqlIdentifier(str):
 def to_sql_identifiers(list_of_str: List[str], *, case_sensitive: bool = False) -> List[SqlIdentifier]:
     return [SqlIdentifier(val, case_sensitive=case_sensitive) for val in list_of_str]
+def parse_fully_qualified_name(
+    name: str,
+) -> Tuple[Optional[SqlIdentifier], Optional[SqlIdentifier], SqlIdentifier]:
+    db, schema, object, _ = identifier.parse_schema_level_object_identifier(name)
+    assert name is not None, f"Unable parse the input name `{name}` as fully qualified."
+    return (
+        SqlIdentifier(db) if db else None,
+        SqlIdentifier(schema) if schema else None,
+        SqlIdentifier(object),
+    )

snowflake/ml/_internal/utils/temp_file_utils.py CHANGED Viewed

@@ -8,14 +8,17 @@ from absl.logging import logging
 logger = logging.getLogger(__name__)
-def get_temp_file_path() -> str:
+def get_temp_file_path(prefix: str = "") -> str:
     """Returns a new random temp file path.
+    Args:
+        prefix: A prefix to the temp file path, this can help add stored file information. Defaults to None.
     Returns:
         A new temp file path.
     """
     # TODO(snandamuri): Use in-memory filesystem for temp files.
-    local_file = tempfile.NamedTemporaryFile(delete=True)
+    local_file = tempfile.NamedTemporaryFile(prefix=prefix, delete=True)
     local_file_name = local_file.name
     local_file.close()
     return local_file_name

snowflake/ml/dataset/__init__.py CHANGED Viewed

@@ -1,9 +1,10 @@
-from .dataset import Dataset
+from .dataset import Dataset, DatasetVersion
 from .dataset_factory import create_from_dataframe, load_dataset
 from .dataset_reader import DatasetReader
 __all__ = [
     "Dataset",
+    "DatasetVersion",
     "DatasetReader",
     "create_from_dataframe",
     "load_dataset",

snowflake/ml/dataset/dataset.py CHANGED Viewed

@@ -73,10 +73,11 @@ class DatasetVersion:
                     f"SHOW VERSIONS LIKE '{self._version}' IN DATASET {self._parent.fully_qualified_name}",
                     statement_params=_TELEMETRY_STATEMENT_PARAMS,
                 )
-                .has_dimensions(expected_rows=1)
+                .has_column(_DATASET_VERSION_NAME_COL, allow_empty=False)
                 .validate()
             )
-            self._properties = sql_result[0].as_dict(True)
+            (match_row,) = (r for r in sql_result if r[_DATASET_VERSION_NAME_COL] == self._version)
+            self._properties = match_row.as_dict(True)
         return self._properties.get(property_name, default)
     def _get_metadata(self) -> Optional[dataset_metadata.DatasetMetadata]:
@@ -283,7 +284,7 @@ class Dataset:
             exclude_cols: Name of column(s) in dataset to be excluded during training/testing (e.g. timestamp).
             label_cols: Name of column(s) in dataset that contains labels.
             properties: Custom metadata properties, saved under `DatasetMetadata.properties`
-            partition_by: Optional partitioning scheme within the new Dataset version.
+            partition_by: Optional SQL expression to use as the partitioning scheme within the new Dataset version.
             comment: A descriptive comment about this dataset.
         Returns:

snowflake/ml/dataset/dataset_reader.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from typing import Any, List
 import pandas as pd
+from pyarrow import parquet as pq
 from snowflake import snowpark
 from snowflake.ml._internal import telemetry
-from snowflake.ml._internal.lineage import data_source, dataset_dataframe
+from snowflake.ml._internal.lineage import data_source, lineage_utils
 from snowflake.ml._internal.utils import import_utils
 from snowflake.ml.fileset import snowfs
@@ -185,7 +186,7 @@ class DatasetReader:
         combined_df = dfs[0]
         for df in dfs[1:]:
             combined_df = combined_df.union_all_by_name(df)
-        return dataset_dataframe.DatasetDataFrame.from_dataframe(combined_df, data_sources=self._sources, inplace=True)
+        return lineage_utils.patch_dataframe(combined_df, data_sources=self._sources, inplace=True)
     @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
     def to_pandas(self) -> pd.DataFrame:
@@ -194,9 +195,5 @@ class DatasetReader:
         if not files:
             return pd.DataFrame()  # Return empty DataFrame
         self._fs.optimize_read(files)
-        pd_dfs = []
-        for file in files:
-            with self._fs.open(file) as fp:
-                pd_dfs.append(pd.read_parquet(fp))
-        pd_df = pd_dfs[0] if len(pd_dfs) == 1 else pd.concat(pd_dfs, ignore_index=True, copy=False)
-        return pd_df
+        pd_ds = pq.ParquetDataset(files, filesystem=self._fs)
+        return pd_ds.read_pandas().to_pandas()

snowflake/ml/feature_store/__init__.py CHANGED Viewed

@@ -2,8 +2,14 @@ import os
 from snowflake.ml._internal import init_utils
+from .access_manager import setup_feature_store
 pkg_dir = os.path.dirname(os.path.abspath(__file__))
 pkg_name = __name__
 exportable_classes = init_utils.fetch_classes_from_modules_in_pkg_dir(pkg_dir=pkg_dir, pkg_name=pkg_name)
 for k, v in exportable_classes.items():
     globals()[k] = v
+__all__ = list(exportable_classes.keys()) + [
+    "setup_feature_store",
+]

snowflake/ml/feature_store/access_manager.py ADDED Viewed

@@ -0,0 +1,283 @@
+from dataclasses import asdict, dataclass
+from enum import Enum
+from typing import Dict, List, Optional
+from warnings import warn
+from snowflake.ml._internal import telemetry
+from snowflake.ml._internal.utils.query_result_checker import SqlResultValidator
+from snowflake.ml._internal.utils.sql_identifier import SqlIdentifier
+from snowflake.ml.feature_store.feature_store import (
+    _FEATURE_STORE_OBJECT_TAG,
+    _FEATURE_VIEW_METADATA_TAG,
+    CreationMode,
+    FeatureStore,
+)
+from snowflake.snowpark import Session, exceptions
+_PROJECT = "FeatureStore"
+_ALL_OBJECTS = "@ALL_OBJECTS"  # Special flag to mark "all+future" grants
+class _FeatureStoreRole(Enum):
+    NONE = 0  # For testing purposes
+    CONSUMER = 1
+    PRODUCER = 2
+@dataclass(frozen=True)
+class _Privilege:
+    object_type: str
+    object_name: str
+    privileges: List[str]
+    scope: Optional[str] = None
+@dataclass(frozen=True)
+class _SessionInfo:
+    database: SqlIdentifier
+    schema: SqlIdentifier
+    warehouse: SqlIdentifier
+# Lists of permissions as tuples of (OBJECT_TYPE, [PRIVILEGES, ...])
+_PRE_INIT_PRIVILEGES: Dict[_FeatureStoreRole, List[_Privilege]] = {
+    _FeatureStoreRole.PRODUCER: [
+        _Privilege("DATABASE", "{database}", ["USAGE"]),
+        _Privilege("SCHEMA", "{database}.{schema}", ["USAGE"]),
+        _Privilege(
+            "SCHEMA",
+            "{database}.{schema}",
+            [
+                "CREATE DYNAMIC TABLE",
+                "CREATE TAG",
+                "CREATE VIEW",
+                "CREATE TASK",
+                "CREATE TABLE",
+            ],
+        ),
+        _Privilege(
+            "SCHEMA",
+            "{database}.{schema}",
+            [
+                "CREATE DATASET",  # Handle DATASET privilege separately since it may not be enabled
+            ],
+        ),
+        _Privilege("DYNAMIC TABLE", _ALL_OBJECTS, ["OPERATE"], "SCHEMA {database}.{schema}"),
+        _Privilege("TASK", _ALL_OBJECTS, ["OPERATE"], "SCHEMA {database}.{schema}"),
+    ],
+    _FeatureStoreRole.CONSUMER: [
+        _Privilege("DATABASE", "{database}", ["USAGE"]),
+        _Privilege("SCHEMA", "{database}.{schema}", ["USAGE"]),
+        _Privilege("DYNAMIC TABLE", _ALL_OBJECTS, ["SELECT", "MONITOR"], "SCHEMA {database}.{schema}"),
+        _Privilege("VIEW", _ALL_OBJECTS, ["SELECT", "REFERENCES"], "SCHEMA {database}.{schema}"),
+        _Privilege("TABLE", _ALL_OBJECTS, ["SELECT", "REFERENCES"], "SCHEMA {database}.{schema}"),
+        _Privilege("DATASET", _ALL_OBJECTS, ["USAGE"], "SCHEMA {database}.{schema}"),
+        # User should decide whether they want to grant warehouse usage to CONSUMER
+        # _Privilege("WAREHOUSE", "{warehouse}", ["USAGE"]),
+    ],
+    _FeatureStoreRole.NONE: [],
+}
+_POST_INIT_PRIVILEGES: Dict[_FeatureStoreRole, List[_Privilege]] = {
+    _FeatureStoreRole.PRODUCER: [
+        _Privilege("TAG", f"{{database}}.{{schema}}.{_FEATURE_VIEW_METADATA_TAG}", ["APPLY"]),
+        _Privilege("TAG", f"{{database}}.{{schema}}.{_FEATURE_STORE_OBJECT_TAG}", ["APPLY"]),
+    ],
+    _FeatureStoreRole.CONSUMER: [],
+    _FeatureStoreRole.NONE: [],
+}
+def _grant_privileges(
+    session: Session, role_name: str, privileges: List[_Privilege], session_info: _SessionInfo
+) -> None:
+    session_info_dict = asdict(session_info)
+    for p in privileges:
+        if p.object_name == _ALL_OBJECTS:
+            # Ensure obj is plural
+            obj = p.object_type.upper()
+            if not obj.endswith("S"):
+                obj += "S"
+            grant_objects = [f"{prefix} {obj}" for prefix in ("FUTURE", "ALL")]
+        else:
+            grant_objects = [f"{p.object_type} {p.object_name.format(**session_info_dict)}"]
+        try:
+            for grant_object in grant_objects:
+                query = f"GRANT {','.join(p.privileges)} ON {grant_object}"
+                if p.scope:
+                    query += f" IN {p.scope.format(**session_info_dict)}"
+                query += f" TO ROLE {role_name}"
+                session.sql(query).collect()
+        except exceptions.SnowparkSQLException as e:
+            if any(
+                s in e.message
+                for s in (
+                    "Ask your account admin",
+                    "Object type or Class",
+                    p.object_type,
+                )
+            ):
+                warn(
+                    f"Failed to grant privilege for {p.object_type}: {e.message}",
+                    UserWarning,
+                    stacklevel=1,
+                )
+            else:
+                raise
+def _configure_pre_init_privileges(
+    session: Session,
+    session_info: _SessionInfo,
+    roles_to_create: Dict[_FeatureStoreRole, str],
+) -> None:
+    """
+    Configure Feature Store role privileges. Must be run with ACCOUNTADMIN
+    or a role with `MANAGE GRANTS` privilege.
+    See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
+    about privilege grants in Snowflake.
+    Args:
+        session: Snowpark Session to interact with Snowflake backend.
+        session_info: Session info like database and schema for the FeatureStore instance.
+        roles_to_create: Producer and optional consumer roles to create.
+    """
+    # Create schema if not already exists
+    (create_rst,) = (
+        SqlResultValidator(
+            session,
+            f"CREATE SCHEMA IF NOT EXISTS {session_info.database}.{session_info.schema}",
+        )
+        .has_dimensions(expected_rows=1)
+        .has_column("status")
+        .validate()
+    )
+    schema_created = create_rst["status"].endswith("successfully created.")
+    # Pass schema ownership from admin to PRODUCER
+    if schema_created:
+        # TODO: we are missing a test case for this code path
+        session.sql(
+            f"GRANT OWNERSHIP ON SCHEMA {session_info.database}.{session_info.schema} "
+            f"TO ROLE {roles_to_create[_FeatureStoreRole.PRODUCER]}"
+        ).collect()
+    # Grant privileges to roles
+    for role_type, role in roles_to_create.items():
+        _grant_privileges(session, role, _PRE_INIT_PRIVILEGES[role_type], session_info)
+def _configure_post_init_privileges(
+    session: Session,
+    session_info: _SessionInfo,
+    roles_to_create: Dict[_FeatureStoreRole, str],
+) -> None:
+    for role_type, role in roles_to_create.items():
+        _grant_privileges(session, role, _POST_INIT_PRIVILEGES[role_type], session_info)
+def _configure_role_hierarchy(
+    session: Session,
+    producer_role: str,
+    consumer_role: Optional[str],
+) -> None:
+    """
+    Create Feature Store roles and configure role hierarchy hierarchy. Must be run with
+    ACCOUNTADMIN or a role with `CREATE ROLE` privilege.
+    See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
+    about privilege grants in Snowflake.
+    Args:
+        session: Snowpark Session to interact with Snowflake backend.
+        producer_role: Name of producer role to be configured.
+        consumer_role: Name of consumer role to be configured.
+    """
+    # Create the necessary roles and build role hierarchy
+    producer_role = SqlIdentifier(producer_role)
+    session.sql(f"CREATE ROLE IF NOT EXISTS {producer_role}").collect()
+    session.sql(f"GRANT ROLE {producer_role} TO ROLE SYSADMIN").collect()
+    session.sql(f"GRANT ROLE {producer_role} TO ROLE {session.get_current_role()}").collect()
+    if consumer_role is not None:
+        consumer_role = SqlIdentifier(consumer_role)
+        session.sql(f"CREATE ROLE IF NOT EXISTS {consumer_role}").collect()
+        session.sql(f"GRANT ROLE {consumer_role} TO ROLE {producer_role}").collect()
+@telemetry.send_api_usage_telemetry(project=_PROJECT)
+def setup_feature_store(
+    session: Session,
+    database: str,
+    schema: str,
+    warehouse: str,
+    producer_role: str = "FS_PRODUCER",
+    consumer_role: Optional[str] = None,
+) -> FeatureStore:
+    """
+    Sets up a new Feature Store including role/privilege setup. Must be run with ACCOUNTADMIN
+    or a role with `MANAGE GRANTS` and `CREATE ROLE` privileges.
+    See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
+    about privilege grants in Snowflake.
+    Args:
+        session: Snowpark Session to interact with Snowflake backend.
+        database: Database to create the FeatureStore instance.
+        schema: Schema to create the FeatureStore instance.
+        warehouse: Default warehouse for Feature Store compute.
+        producer_role: Name of producer role to be configured.
+        consumer_role: Name of consumer role to be configured. If not specified, consumer role won't be created.
+    Returns:
+        Feature Store instance.
+    Raises:
+        exceptions.SnowparkSQLException: Insufficient privileges.
+    """
+    database = SqlIdentifier(database)
+    schema = SqlIdentifier(schema)
+    warehouse = SqlIdentifier(warehouse)
+    session_info = _SessionInfo(
+        SqlIdentifier(database),
+        SqlIdentifier(schema),
+        SqlIdentifier(warehouse),
+    )
+    try:
+        roles_to_create = {_FeatureStoreRole.PRODUCER: producer_role}
+        if consumer_role is not None:
+            roles_to_create.update({_FeatureStoreRole.CONSUMER: consumer_role})
+        _configure_role_hierarchy(session, producer_role=producer_role, consumer_role=consumer_role)
+    except exceptions.SnowparkSQLException:
+        # Error can be safely ignored if roles already exist and hierarchy is already built
+        for _, role in roles_to_create.items():
+            # Ensure roles already exist
+            if session.sql(f"SHOW ROLES LIKE '{role}' STARTS WITH '{role}'").count() == 0:
+                raise
+        if consumer_role is not None:
+            # Ensure hierarchy already configured
+            consumer_grants = session.sql(f"SHOW GRANTS ON ROLE {consumer_role}").collect()
+            if not any(r["granted_to"] == "ROLE" and r["grantee_name"] == producer_role for r in consumer_grants):
+                raise
+    # Do any pre-FeatureStore.__init__() privilege setup
+    _configure_pre_init_privileges(session, session_info, roles_to_create)
+    # Use PRODUCER role to create and operate new Feature Store
+    current_role = session.get_current_role()
+    assert current_role is not None  # to make mypy happy
+    try:
+        session.use_role(producer_role)
+        fs = FeatureStore(session, database, schema, warehouse, creation_mode=CreationMode.CREATE_IF_NOT_EXIST)
+    finally:
+        session.use_role(current_role)
+    # Do any post-FeatureStore.__init__() privilege setup
+    _configure_post_init_privileges(session, session_info, roles_to_create)
+    return fs

snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl

snowflake-ml-python 1.5.0py3-none-any.whl → 1.5.2py3-none-any.whl