PyPI - snowflake-ml-python - Versions diffs - 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl - Mend

snowflake-ml-python 1.8.2py3-none-any.whl → 1.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (322) hide show

snowflake/ml/model/_signatures/utils.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import warnings
-from typing import Any, Dict, List, Optional, Sequence
+from typing import Any, Optional, Sequence
 import numpy as np
 import numpy.typing as npt
@@ -12,7 +12,7 @@ from snowflake.ml._internal.exceptions import (
 from snowflake.ml.model._signatures import core
-def convert_list_to_ndarray(data: List[Any]) -> npt.NDArray[Any]:
+def convert_list_to_ndarray(data: list[Any]) -> npt.NDArray[Any]:
     """Create a numpy array from list or nested list. Avoid ragged list and unaligned types.
     Args:
@@ -49,7 +49,7 @@ def convert_list_to_ndarray(data: List[Any]) -> npt.NDArray[Any]:
 def rename_features(
-    features: Sequence[core.BaseFeatureSpec], feature_names: Optional[List[str]] = None
+    features: Sequence[core.BaseFeatureSpec], feature_names: Optional[list[str]] = None
 ) -> Sequence[core.BaseFeatureSpec]:
     """It renames the feature in features provided optional feature names.
@@ -104,7 +104,7 @@ def rename_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureSpec
     return data
-def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any]) -> Optional[core.ModelSignature]:
+def huggingface_pipeline_signature_auto_infer(task: str, params: dict[str, Any]) -> Optional[core.ModelSignature]:
     # Text
     # https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ConversationalPipeline
@@ -351,7 +351,7 @@ def series_dropna(series: pd.Series) -> pd.Series:
     return series.dropna(inplace=False).reset_index(drop=True).convert_dtypes()
-def infer_list(name: str, data: List[Any]) -> core.BaseFeatureSpec:
+def infer_list(name: str, data: list[Any]) -> core.BaseFeatureSpec:
     """Infer the feature specification from a list.
     Args:
@@ -382,7 +382,7 @@ def infer_list(name: str, data: List[Any]) -> core.BaseFeatureSpec:
     return core.FeatureSpec(name=name, dtype=arr_dtype, shape=arr.shape)
-def infer_dict(name: str, data: Dict[str, Any]) -> core.FeatureGroupSpec:
+def infer_dict(name: str, data: dict[str, Any]) -> core.FeatureGroupSpec:
     """Infer the feature specification from a dictionary.
     Args:

snowflake/ml/model/custom_model.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import functools
 import inspect
-from typing import Any, Callable, Coroutine, Dict, Generator, List, Optional, Union
+from typing import Any, Callable, Coroutine, Generator, Optional, Union
 import anyio
 import pandas as pd
+from typing_extensions import deprecated
 from snowflake.ml.model import type_hints as model_types
@@ -78,7 +79,7 @@ class ModelRef:
             return MethodRef(self, method_name)
         raise AttributeError(f"Method {method_name} not found in model {self._name}.")
-    def __getstate__(self) -> Dict[str, Any]:
+    def __getstate__(self) -> dict[str, Any]:
         state = self.__dict__.copy()
         del state["_model"]
         return state
@@ -113,8 +114,8 @@ class ModelContext:
     def __init__(
         self,
         *,
-        artifacts: Optional[Union[Dict[str, str], str, model_types.SupportedModelType]] = None,
-        models: Optional[Union[Dict[str, model_types.SupportedModelType], str, model_types.SupportedModelType]] = None,
+        artifacts: Optional[Union[dict[str, str], str, model_types.SupportedModelType]] = None,
+        models: Optional[Union[dict[str, model_types.SupportedModelType], str, model_types.SupportedModelType]] = None,
         **kwargs: Optional[Union[str, model_types.SupportedModelType]],
     ) -> None:
         """Initialize the model context.
@@ -130,8 +131,8 @@ class ModelContext:
             ValueError: Raised when the model name is duplicated.
         """
-        self.artifacts: Dict[str, str] = dict()
-        self.model_refs: Dict[str, ModelRef] = dict()
+        self.artifacts: dict[str, str] = dict()
+        self.model_refs: dict[str, ModelRef] = dict()
         # In case that artifacts is a dictionary, assume the original usage,
         # which is to pass in a dictionary of artifacts.
@@ -185,7 +186,7 @@ class ModelContext:
         return self.model_refs[name]
     def __getitem__(self, key: str) -> Union[str, ModelRef]:
-        combined: Dict[str, Union[str, ModelRef]] = {**self.artifacts, **self.model_refs}
+        combined: dict[str, Union[str, ModelRef]] = {**self.artifacts, **self.model_refs}
         if key not in combined:
             raise KeyError(f"Key {key} not found in the kwargs, current available keys are: {combined.keys()}")
         return combined[key]
@@ -226,12 +227,12 @@ class CustomModel:
                 else:
                     raise TypeError("A non-method inference API function is not supported.")
-    def _get_partitioned_infer_methods(self) -> List[str]:
-        """Returns all methods in CLS with `partitioned_inference_api` as the outermost decorator."""
+    def _get_partitioned_methods(self) -> list[str]:
+        """Returns all methods in CLS with `partitioned_api` as the outermost decorator."""
         rv = []
         for cls_method_str in dir(self):
             cls_method = getattr(self, cls_method_str)
-            if getattr(cls_method, "_is_partitioned_inference_api", False):
+            if getattr(cls_method, "_is_partitioned_api", False):
                 if inspect.ismethod(cls_method):
                     rv.append(cls_method_str)
                 else:
@@ -282,9 +283,21 @@ def inference_api(
     return func
+def partitioned_api(
+    func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame],
+) -> Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]:
+    func.__dict__["_is_inference_api"] = True
+    func.__dict__["_is_partitioned_api"] = True
+    return func
+@deprecated(
+    "snowflake.ml.custom_model.partitioned_inference_api is deprecated and will be removed in a future release."
+    " Use snowflake.ml.custom_model.partitioned_api instead."
+)
 def partitioned_inference_api(
     func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame],
 ) -> Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]:
     func.__dict__["_is_inference_api"] = True
-    func.__dict__["_is_partitioned_inference_api"] = True
+    func.__dict__["_is_partitioned_api"] = True
     return func

snowflake/ml/model/model_signature.py CHANGED Viewed

@@ -1,18 +1,7 @@
 import enum
 import json
 import warnings
-from typing import (
-    Any,
-    Dict,
-    List,
-    Literal,
-    Optional,
-    Sequence,
-    Tuple,
-    Type,
-    Union,
-    cast,
-)
+from typing import Any, Literal, Optional, Sequence, Union, cast
 import numpy as np
 import pandas as pd
@@ -30,7 +19,7 @@ from snowflake.ml._internal.utils import formatting, identifier, sql_identifier
 from snowflake.ml.model import type_hints as model_types
 from snowflake.ml.model._signatures import (
     base_handler,
-    builtins_handler as builtins_handler,
+    builtins_handler,
     core,
     dmatrix_handler,
     numpy_handler,
@@ -48,7 +37,7 @@ FeatureGroupSpec = core.FeatureGroupSpec
 ModelSignature = core.ModelSignature
-_LOCAL_DATA_HANDLERS: List[Type[base_handler.BaseDataHandler[Any]]] = [
+_LOCAL_DATA_HANDLERS: list[type[base_handler.BaseDataHandler[Any]]] = [
     pandas_handler.PandasDataFrameHandler,
     numpy_handler.NumpyArrayHandler,
     builtins_handler.ListOfBuiltinHandler,
@@ -82,9 +71,9 @@ def _truncate_data(
             warnings.warn(
                 formatting.unwrap(
                     f"""
-                    The sample input has {row_count} rows, thus a truncation happened before inferring signature.
-                    This might cause inaccurate signature inference.
-                    If that happens, consider specifying signature manually.
+                    The sample input has {row_count} rows. Using the first 100 rows to define the inputs and outputs
+                    of the model and the data types of each. Use `signatures` parameter to specify model inputs and
+                    outputs manually if the automatic inference is not correct.
                     """
                 ),
                 category=UserWarning,
@@ -414,7 +403,7 @@ class SnowparkIdentifierRule(enum.Enum):
 def _get_dataframe_values_range(
     df: snowflake.snowpark.DataFrame,
-) -> Dict[str, Union[Tuple[int, int], Tuple[float, float]]]:
+) -> dict[str, Union[tuple[int, int], tuple[float, float]]]:
     columns = [
         F.array_construct(F.min(field.name), F.max(field.name)).as_(field.name)
         for field in df.schema.fields
@@ -429,7 +418,7 @@ def _get_dataframe_values_range(
             original_exception=ValueError(f"Unable to get the value range of fields {df.columns}"),
         )
     return cast(
-        Dict[str, Union[Tuple[int, int], Tuple[float, float]]],
+        dict[str, Union[tuple[int, int], tuple[float, float]]],
         {
             sql_identifier.SqlIdentifier(k, case_sensitive=True).identifier(): (json.loads(v)[0], json.loads(v)[1])
             for k, v in res[0].as_dict().items()
@@ -456,7 +445,7 @@ def _validate_snowpark_data(
         - inferred: signature `a` - Snowpark DF `"a"`, use `get_inferred_name`
         - normalized: signature `a` - Snowpark DF `A`, use `resolve_identifier`
     """
-    errors: Dict[SnowparkIdentifierRule, List[Exception]] = {
+    errors: dict[SnowparkIdentifierRule, list[Exception]] = {
         SnowparkIdentifierRule.INFERRED: [],
         SnowparkIdentifierRule.NORMALIZED: [],
     }
@@ -549,7 +538,7 @@ def _validate_snowpark_type_feature(
     field: spt.StructField,
     ft_type: DataType,
     ft_name: str,
-    value_range: Optional[Union[Tuple[int, int], Tuple[float, float]]],
+    value_range: Optional[Union[tuple[int, int], tuple[float, float]]],
     strict: bool = False,
 ) -> None:
     field_data_type = field.datatype
@@ -716,8 +705,8 @@ def _convert_and_validate_local_data(
 def infer_signature(
     input_data: model_types.SupportedLocalDataType,
     output_data: model_types.SupportedLocalDataType,
-    input_feature_names: Optional[List[str]] = None,
-    output_feature_names: Optional[List[str]] = None,
+    input_feature_names: Optional[list[str]] = None,
+    output_feature_names: Optional[list[str]] = None,
     input_data_limit: Optional[int] = 100,
     output_data_limit: Optional[int] = 100,
 ) -> core.ModelSignature:

snowflake/ml/model/models/huggingface_pipeline.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import warnings
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 from packaging import version
@@ -13,7 +13,7 @@ class HuggingFacePipelineModel:
         revision: Optional[str] = None,
         token: Optional[str] = None,
         trust_remote_code: Optional[bool] = None,
-        model_kwargs: Optional[Dict[str, Any]] = None,
+        model_kwargs: Optional[dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
         """
@@ -65,6 +65,7 @@ class HuggingFacePipelineModel:
             warnings.warn(
                 "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.",
                 FutureWarning,
+                stacklevel=2,
             )
             if token is not None:
                 raise ValueError(
@@ -183,7 +184,8 @@ class HuggingFacePipelineModel:
             warnings.warn(
                 f"No model was supplied, defaulted to {model} and revision"
                 f" {revision} ({transformers.pipelines.HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n"
-                "Using a pipeline without specifying a model name and revision in production is not recommended."
+                "Using a pipeline without specifying a model name and revision in production is not recommended.",
+                stacklevel=2,
             )
             if config is None and isinstance(model, str):
                 config_obj = transformers.AutoConfig.from_pretrained(
@@ -200,7 +202,8 @@ class HuggingFacePipelineModel:
             if kwargs.get("device", None) is not None:
                 warnings.warn(
                     "Both `device` and `device_map` are specified. `device` will override `device_map`. You"
-                    " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`."
+                    " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`.",
+                    stacklevel=2,
                 )
         # ==== End pipeline logic from transformers ====

snowflake/ml/model/type_hints.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # mypy: disable-error-code="import"
 from enum import Enum
-from typing import TYPE_CHECKING, Dict, Literal, Sequence, TypedDict, TypeVar, Union
+from typing import TYPE_CHECKING, Literal, Sequence, TypedDict, TypeVar, Union
 import numpy.typing as npt
 from typing_extensions import NotRequired
@@ -32,7 +32,7 @@ _SupportedBuiltins = Union[
     bool,
     str,
     bytes,
-    Dict[str, Union["_SupportedBuiltins", "_SupportedBuiltinsList"]],
+    dict[str, Union["_SupportedBuiltins", "_SupportedBuiltinsList"]],
     "_SupportedBuiltinsList",
 ]
 _SupportedNumpyDtype = Union[
@@ -153,7 +153,7 @@ class BaseModelSaveOption(TypedDict):
     embed_local_ml_library: NotRequired[bool]
     relax_version: NotRequired[bool]
     function_type: NotRequired[Literal["FUNCTION", "TABLE_FUNCTION"]]
-    method_options: NotRequired[Dict[str, ModelMethodSaveOptions]]
+    method_options: NotRequired[dict[str, ModelMethodSaveOptions]]
     enable_explainability: NotRequired[bool]
     save_location: NotRequired[str]

snowflake/ml/modeling/_internal/estimator_utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import inspect
 import numbers
 import os
-from typing import Any, Callable, Dict, List, Set, Tuple
+from typing import Any, Callable
 import cloudpickle as cp
 import numpy as np
@@ -16,7 +16,7 @@ from snowflake.snowpark import Session
 from snowflake.snowpark._internal import utils as snowpark_utils
-def validate_sklearn_args(args: Dict[str, Tuple[Any, Any, bool]], klass: type) -> Dict[str, Any]:
+def validate_sklearn_args(args: dict[str, tuple[Any, Any, bool]], klass: type) -> dict[str, Any]:
     """Validate if all the keyword args are supported by current version of SKLearn/XGBoost object.
     Args:
@@ -71,7 +71,7 @@ def transform_snowml_obj_to_sklearn_obj(obj: Any) -> Any:
         return obj
-def gather_dependencies(obj: Any) -> Set[str]:
+def gather_dependencies(obj: Any) -> set[str]:
     """Gathers dependencies from the SnowML Estimator and Transformer objects.
     Args:
@@ -82,7 +82,7 @@ def gather_dependencies(obj: Any) -> Set[str]:
     """
     if isinstance(obj, list) or isinstance(obj, tuple):
-        deps: Set[str] = set()
+        deps: set[str] = set()
         for elem in obj:
             deps = deps | set(gather_dependencies(elem))
         return deps
@@ -167,8 +167,8 @@ def get_module_name(model: object) -> str:
 def handle_inference_result(
-    inference_res: Any, output_cols: List[str], inference_method: str, within_udf: bool = False
-) -> Tuple[npt.NDArray[Any], List[str]]:
+    inference_res: Any, output_cols: list[str], inference_method: str, within_udf: bool = False
+) -> tuple[npt.NDArray[Any], list[str]]:
     if isinstance(inference_res, list) and len(inference_res) > 0 and isinstance(inference_res[0], np.ndarray):
         # In case of multioutput estimators, predict_proba, decision_function etc., functions return a list of
         # ndarrays. We need to concatenate them.
@@ -248,7 +248,7 @@ def create_temp_stage(session: Session) -> str:
 def upload_model_to_stage(
-    stage_name: str, estimator: object, session: Session, statement_params: Dict[str, str]
+    stage_name: str, estimator: object, session: Session, statement_params: dict[str, str]
 ) -> str:
     """Util method to pickle and upload the model to a temp Snowflake stage.

snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import inspect
-from typing import Any, List, Optional
+from typing import Any, Optional
 import pandas as pd
@@ -38,9 +38,9 @@ class PandasTransformHandlers:
     def batch_inference(
         self,
         inference_method: str,
-        input_cols: List[str],
-        expected_output_cols: List[str],
-        snowpark_input_cols: Optional[List[str]] = None,
+        input_cols: list[str],
+        expected_output_cols: list[str],
+        snowpark_input_cols: Optional[list[str]] = None,
         drop_input_cols: Optional[bool] = False,
         *args: Any,
         **kwargs: Any,
@@ -147,8 +147,8 @@ class PandasTransformHandlers:
     def score(
         self,
-        input_cols: List[str],
-        label_cols: List[str],
+        input_cols: list[str],
+        label_cols: list[str],
         sample_weight_col: Optional[str],
         *args: Any,
         **kwargs: Any,

snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import inspect
-from typing import List, Optional, Tuple
+from typing import Optional
 import pandas as pd
@@ -15,8 +15,8 @@ class PandasModelTrainer:
         self,
         estimator: object,
         dataset: pd.DataFrame,
-        input_cols: List[str],
-        label_cols: Optional[List[str]],
+        input_cols: list[str],
+        label_cols: Optional[list[str]],
         sample_weight_col: Optional[str],
     ) -> None:
         """
@@ -57,10 +57,10 @@ class PandasModelTrainer:
     def train_fit_predict(
         self,
-        expected_output_cols_list: List[str],
+        expected_output_cols_list: list[str],
         drop_input_cols: Optional[bool] = False,
         example_output_pd_df: Optional[pd.DataFrame] = None,
-    ) -> Tuple[pd.DataFrame, object]:
+    ) -> tuple[pd.DataFrame, object]:
         """Trains the model using specified features and target columns from the dataset.
         This API is different from fit itself because it would also provide the predict
         output.
@@ -92,9 +92,9 @@ class PandasModelTrainer:
     def train_fit_transform(
         self,
-        expected_output_cols_list: List[str],
+        expected_output_cols_list: list[str],
         drop_input_cols: Optional[bool] = False,
-    ) -> Tuple[pd.DataFrame, object]:
+    ) -> tuple[pd.DataFrame, object]:
         """Trains the model using specified features and target columns from the dataset.
         This API is different from fit itself because it would also provide the transform
         output.

snowflake/ml/modeling/_internal/model_specifications.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import List
 import cloudpickle as cp
 import numpy as np
@@ -11,7 +9,7 @@ class ModelSpecifications:
     A dataclass to define model based specifications like required imports, and package dependencies for Sproc/Udfs.
     """
-    def __init__(self, imports: List[str], pkgDependencies: List[str]) -> None:
+    def __init__(self, imports: list[str], pkgDependencies: list[str]) -> None:
         self.imports = imports
         self.pkgDependencies = pkgDependencies
@@ -20,7 +18,7 @@ class SKLearnModelSpecifications(ModelSpecifications):
     def __init__(self) -> None:
         import sklearn
-        imports: List[str] = ["sklearn"]
+        imports: list[str] = ["sklearn"]
         # TODO(snandamuri): Replace cloudpickle with joblib after latest version of joblib is added to snowflake conda.
         pkgDependencies = [
             f"numpy=={np.__version__}",
@@ -56,8 +54,8 @@ class XGBoostModelSpecifications(ModelSpecifications):
         import sklearn
         import xgboost
-        imports: List[str] = ["xgboost"]
-        pkgDependencies: List[str] = [
+        imports: list[str] = ["xgboost"]
+        pkgDependencies: list[str] = [
             f"numpy=={np.__version__}",
             f"scikit-learn=={sklearn.__version__}",
             f"xgboost=={xgboost.__version__}",
@@ -71,8 +69,8 @@ class LightGBMModelSpecifications(ModelSpecifications):
         import lightgbm
         import sklearn
-        imports: List[str] = ["lightgbm"]
-        pkgDependencies: List[str] = [
+        imports: list[str] = ["lightgbm"]
+        pkgDependencies: list[str] = [
             f"numpy=={np.__version__}",
             f"scikit-learn=={sklearn.__version__}",
             f"lightgbm=={lightgbm.__version__}",
@@ -86,8 +84,8 @@ class SklearnModelSelectionModelSpecifications(ModelSpecifications):
         import sklearn
         import xgboost
-        imports: List[str] = ["sklearn", "xgboost"]
-        pkgDependencies: List[str] = [
+        imports: list[str] = ["sklearn", "xgboost"]
+        pkgDependencies: list[str] = [
             f"numpy=={np.__version__}",
             f"scikit-learn=={sklearn.__version__}",
             f"cloudpickle=={cp.__version__}",

snowflake/ml/modeling/_internal/model_trainer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional, Protocol, Tuple, Union
+from typing import Optional, Protocol, Union
 import pandas as pd
@@ -18,15 +18,15 @@ class ModelTrainer(Protocol):
     def train_fit_predict(
         self,
-        expected_output_cols_list: List[str],
+        expected_output_cols_list: list[str],
         drop_input_cols: Optional[bool] = False,
         example_output_pd_df: Optional[pd.DataFrame] = None,
-    ) -> Tuple[Union[DataFrame, pd.DataFrame], object]:
+    ) -> tuple[Union[DataFrame, pd.DataFrame], object]:
         raise NotImplementedError
     def train_fit_transform(
         self,
-        expected_output_cols_list: List[str],
+        expected_output_cols_list: list[str],
         drop_input_cols: Optional[bool] = False,
-    ) -> Tuple[Union[DataFrame, pd.DataFrame], object]:
+    ) -> tuple[Union[DataFrame, pd.DataFrame], object]:
         raise NotImplementedError

snowflake/ml/modeling/_internal/model_trainer_builder.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional, Union
+from typing import Optional, Union
 import pandas as pd
 from sklearn import model_selection
@@ -71,8 +71,8 @@ class ModelTrainerBuilder:
         cls,
         estimator: object,
         dataset: Union[DataFrame, pd.DataFrame],
-        input_cols: Optional[List[str]] = None,
-        label_cols: Optional[List[str]] = None,
+        input_cols: Optional[list[str]] = None,
+        label_cols: Optional[list[str]] = None,
         sample_weight_col: Optional[str] = None,
         autogenerated: bool = False,
         subproject: str = "",
@@ -130,7 +130,7 @@ class ModelTrainerBuilder:
         cls,
         estimator: object,
         dataset: Union[DataFrame, pd.DataFrame],
-        input_cols: List[str],
+        input_cols: list[str],
         autogenerated: bool = False,
         subproject: str = "",
     ) -> ModelTrainer:
@@ -169,8 +169,8 @@ class ModelTrainerBuilder:
         cls,
         estimator: object,
         dataset: Union[DataFrame, pd.DataFrame],
-        input_cols: List[str],
-        label_cols: Optional[List[str]] = None,
+        input_cols: list[str],
+        label_cols: Optional[list[str]] = None,
         sample_weight_col: Optional[str] = None,
         autogenerated: bool = False,
         subproject: str = "",

snowflake-ml-python 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl

snowflake-ml-python 1.8.2py3-none-any.whl → 1.8.4py3-none-any.whl