PyPI - snowflake-ml-python - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

snowflake/ml/_internal/env_utils.py +2 -1
snowflake/ml/_internal/file_utils.py +35 -40
snowflake/ml/_internal/telemetry.py +5 -8
snowflake/ml/_internal/utils/identifier.py +74 -7
snowflake/ml/_internal/utils/uri.py +7 -2
snowflake/ml/model/_core_requirements.py +1 -1
snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
snowflake/ml/model/_deployer.py +14 -27
snowflake/ml/model/_env.py +4 -4
snowflake/ml/model/_handlers/_base.py +3 -1
snowflake/ml/model/_handlers/custom.py +14 -2
snowflake/ml/model/_handlers/pytorch.py +186 -0
snowflake/ml/model/_handlers/sklearn.py +14 -8
snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
snowflake/ml/model/_handlers/torchscript.py +180 -0
snowflake/ml/model/_handlers/xgboost.py +19 -9
snowflake/ml/model/_model.py +27 -21
snowflake/ml/model/_model_meta.py +33 -19
snowflake/ml/model/model_signature.py +446 -66
snowflake/ml/model/type_hints.py +28 -15
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
snowflake/ml/modeling/cluster/birch.py +79 -43
snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
snowflake/ml/modeling/cluster/dbscan.py +79 -43
snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
snowflake/ml/modeling/cluster/k_means.py +79 -43
snowflake/ml/modeling/cluster/mean_shift.py +79 -43
snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
snowflake/ml/modeling/cluster/optics.py +79 -43
snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
snowflake/ml/modeling/compose/column_transformer.py +79 -43
snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
snowflake/ml/modeling/covariance/oas.py +79 -43
snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
snowflake/ml/modeling/decomposition/pca.py +79 -43
snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
snowflake/ml/modeling/impute/knn_imputer.py +79 -43
snowflake/ml/modeling/impute/missing_indicator.py +79 -43
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
snowflake/ml/modeling/linear_model/lars.py +79 -43
snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
snowflake/ml/modeling/linear_model/lasso.py +79 -43
snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
snowflake/ml/modeling/linear_model/perceptron.py +79 -43
snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
snowflake/ml/modeling/linear_model/ridge.py +79 -43
snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
snowflake/ml/modeling/manifold/isomap.py +79 -43
snowflake/ml/modeling/manifold/mds.py +79 -43
snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
snowflake/ml/modeling/manifold/tsne.py +79 -43
snowflake/ml/modeling/metrics/classification.py +6 -1
snowflake/ml/modeling/metrics/regression.py +517 -9
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
snowflake/ml/modeling/pipeline/pipeline.py +24 -0
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
snowflake/ml/modeling/svm/linear_svc.py +79 -43
snowflake/ml/modeling/svm/linear_svr.py +79 -43
snowflake/ml/modeling/svm/nu_svc.py +79 -43
snowflake/ml/modeling/svm/nu_svr.py +79 -43
snowflake/ml/modeling/svm/svc.py +79 -43
snowflake/ml/modeling/svm/svr.py +79 -43
snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
snowflake/ml/registry/model_registry.py +123 -121
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0

snowflake/ml/model/model_signature.py CHANGED Viewed

@@ -1,8 +1,10 @@
+import json
 import textwrap
 import warnings
 from abc import ABC, abstractmethod
 from enum import Enum
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Dict,
@@ -26,8 +28,14 @@ from typing_extensions import TypeGuard
 import snowflake.snowpark
 import snowflake.snowpark.types as spt
+from snowflake.ml._internal import type_utils
 from snowflake.ml._internal.utils import formatting, identifier
 from snowflake.ml.model import type_hints as model_types
+from snowflake.ml.model._deploy_client.warehouse import infer_template
+if TYPE_CHECKING:
+    import tensorflow
+    import torch
 class DataType(Enum):
@@ -36,22 +44,22 @@ class DataType(Enum):
         self._snowpark_type = snowpark_type
         self._numpy_type = numpy_type
-    INT8 = ("int8", spt.IntegerType, np.int8)
-    INT16 = ("int16", spt.IntegerType, np.int16)
+    INT8 = ("int8", spt.ByteType, np.int8)
+    INT16 = ("int16", spt.ShortType, np.int16)
     INT32 = ("int32", spt.IntegerType, np.int32)
-    INT64 = ("int64", spt.IntegerType, np.int64)
+    INT64 = ("int64", spt.LongType, np.int64)
     FLOAT = ("float", spt.FloatType, np.float32)
     DOUBLE = ("double", spt.DoubleType, np.float64)
-    UINT8 = ("uint8", spt.IntegerType, np.uint8)
-    UINT16 = ("uint16", spt.IntegerType, np.uint16)
+    UINT8 = ("uint8", spt.ByteType, np.uint8)
+    UINT16 = ("uint16", spt.ShortType, np.uint16)
     UINT32 = ("uint32", spt.IntegerType, np.uint32)
-    UINT64 = ("uint64", spt.IntegerType, np.uint64)
+    UINT64 = ("uint64", spt.LongType, np.uint64)
-    BOOL = ("bool", spt.BooleanType, np.bool8)
-    STRING = ("string", spt.StringType, np.str0)
-    BYTES = ("bytes", spt.BinaryType, np.bytes0)
+    BOOL = ("bool", spt.BooleanType, np.bool_)
+    STRING = ("string", spt.StringType, np.str_)
+    BYTES = ("bytes", spt.BinaryType, np.bytes_)
     def as_snowpark_type(self) -> spt.DataType:
         """Convert to corresponding Snowpark Type.
@@ -84,6 +92,30 @@ class DataType(Enum):
                 return np_to_snowml_type_mapping[potential_type]
         raise NotImplementedError(f"Type {np_type} is not supported as a DataType.")
+    @classmethod
+    def from_torch_type(cls, torch_type: "torch.dtype") -> "DataType":
+        import torch
+        """Translate torch dtype to DataType for signature definition.
+        Args:
+            torch_type: The torch dtype.
+        Returns:
+            Corresponding DataType.
+        """
+        torch_dtype_to_numpy_dtype_mapping = {
+            torch.uint8: np.uint8,
+            torch.int8: np.int8,
+            torch.int16: np.int16,
+            torch.int32: np.int32,
+            torch.int64: np.int64,
+            torch.float32: np.float32,
+            torch.float64: np.float64,
+            torch.bool: np.bool_,
+        }
+        return cls.from_numpy_type(torch_dtype_to_numpy_dtype_mapping[torch_type])
     @classmethod
     def from_snowpark_type(cls, snowpark_type: spt.DataType) -> "DataType":
         """Translate snowpark type to DataType for signature definition.
@@ -97,30 +129,45 @@ class DataType(Enum):
         Returns:
             Corresponding DataType.
         """
+        if isinstance(snowpark_type, spt.ArrayType):
+            actual_sp_type = snowpark_type.element_type
+        else:
+            actual_sp_type = snowpark_type
         snowpark_to_snowml_type_mapping: Dict[Type[spt.DataType], DataType] = {
-            spt._IntegralType: DataType.INT64,
-            **{i._snowpark_type: i for i in DataType if i._snowpark_type != spt.IntegerType},
+            i._snowpark_type: i
+            for i in DataType
+            # We by default infer as signed integer.
+            if i not in [DataType.UINT8, DataType.UINT16, DataType.UINT32, DataType.UINT64]
         }
         for potential_type in snowpark_to_snowml_type_mapping.keys():
-            if isinstance(snowpark_type, potential_type):
+            if isinstance(actual_sp_type, potential_type):
                 return snowpark_to_snowml_type_mapping[potential_type]
+        # Fallback for decimal type.
+        if isinstance(snowpark_type, spt.DecimalType):
+            if snowpark_type.scale == 0:
+                return DataType.INT64
         raise NotImplementedError(f"Type {snowpark_type} is not supported as a DataType.")
     def is_same_snowpark_type(self, incoming_snowpark_type: spt.DataType) -> bool:
         """Check if provided snowpark type is the same as Data Type.
-            Since for Snowflake all integer types are same, thus when datatype is a integer type, the incoming snowpark
-            type can be any type inherit from _IntegralType.
         Args:
             incoming_snowpark_type: The snowpark type.
+        Raises:
+            NotImplementedError: Raised when the given numpy type is not supported.
         Returns:
             If the provided snowpark type is the same as the DataType.
         """
-        if self._snowpark_type == spt.IntegerType:
-            return isinstance(incoming_snowpark_type, spt._IntegralType)
-        else:
-            return isinstance(incoming_snowpark_type, self._snowpark_type)
+        # Special handle for Decimal Type.
+        if isinstance(incoming_snowpark_type, spt.DecimalType):
+            if incoming_snowpark_type.scale == 0:
+                return self == DataType.INT64 or self == DataType.UINT64
+            raise NotImplementedError(f"Type {incoming_snowpark_type} is not supported as a DataType.")
+        return isinstance(incoming_snowpark_type, self._snowpark_type)
 class BaseFeatureSpec(ABC):
@@ -174,9 +221,19 @@ class FeatureSpec(BaseFeatureSpec):
                 (2,): 1d list with fixed len of 2.
                 (-1,): 1d list with variable length. Used for ragged tensor representation.
                 (d1, d2, d3): 3d tensor.
+        Raises:
+            TypeError: Raised when the dtype input type is incorrect.
+            TypeError: Raised when the shape input type is incorrect.
         """
         super().__init__(name=name)
+        if not isinstance(dtype, DataType):
+            raise TypeError("dtype should be a model signature datatype.")
         self._dtype = dtype
+        if shape and not isinstance(shape, tuple):
+            raise TypeError("Shape should be a tuple if presented.")
         self._shape = shape
     def as_snowpark_type(self) -> spt.DataType:
@@ -191,7 +248,7 @@ class FeatureSpec(BaseFeatureSpec):
         """Convert to corresponding local Type."""
         if not self._shape:
             return self._dtype._numpy_type
-        return np.object0
+        return np.object_
     def __eq__(self, other: object) -> bool:
         if isinstance(other, FeatureSpec):
@@ -229,6 +286,8 @@ class FeatureSpec(BaseFeatureSpec):
         """
         name = input_dict["name"]
         shape = input_dict.get("shape", None)
+        if shape:
+            shape = tuple(shape)
         type = DataType[input_dict["type"]]
         return FeatureSpec(name=name, dtype=type, shape=shape)
@@ -421,7 +480,7 @@ class _BaseDataHandler(ABC, Generic[model_types._DataType]):
     @staticmethod
     @abstractmethod
-    def convert_to_df(data: model_types._DataType) -> Union[pd.DataFrame, snowflake.snowpark.DataFrame]:
+    def convert_to_df(data: model_types._DataType, ensure_serializable: bool = True) -> pd.DataFrame:
         ...
@@ -454,7 +513,7 @@ class _PandasDataFrameHandler(_BaseDataHandler[pd.DataFrame]):
             np.int64,
             np.uint64,
             np.float64,
-            np.object0,
+            np.object_,
         ]:  # To keep compatibility with Pandas 2.x and 1.x
             raise ValueError("Data Validation Error: Unsupported column index type is found.")
@@ -538,7 +597,17 @@ class _PandasDataFrameHandler(_BaseDataHandler[pd.DataFrame]):
         return specs
     @staticmethod
-    def convert_to_df(data: pd.DataFrame) -> pd.DataFrame:
+    def convert_to_df(data: pd.DataFrame, ensure_serializable: bool = True) -> pd.DataFrame:
+        if not ensure_serializable:
+            return data
+        # This convert is necessary since numpy dataframe cannot be correctly handled when provided as an element of
+        # a list when creating Snowpark Dataframe.
+        df_cols = data.columns
+        df_col_dtypes = [data[col].dtype for col in data.columns]
+        for df_col, df_col_dtype in zip(df_cols, df_col_dtypes):
+            if df_col_dtype == np.dtype("O"):
+                if isinstance(data[df_col][0], np.ndarray):
+                    data[df_col] = data[df_col].map(np.ndarray.tolist)
         return data
@@ -569,7 +638,7 @@ class _NumpyArrayHandler(_BaseDataHandler[model_types._SupportedNumpyArray]):
     def infer_signature(
         data: model_types._SupportedNumpyArray, role: Literal["input", "output"]
     ) -> Sequence[BaseFeatureSpec]:
-        feature_prefix = f"{_PandasDataFrameHandler.FEATURE_PREFIX}_"
+        feature_prefix = f"{_NumpyArrayHandler.FEATURE_PREFIX}_"
         dtype = DataType.from_numpy_type(data.dtype)
         role_prefix = (_NumpyArrayHandler.INPUT_PREFIX if role == "input" else _NumpyArrayHandler.OUTPUT_PREFIX) + "_"
         if len(data.shape) == 1:
@@ -588,68 +657,269 @@ class _NumpyArrayHandler(_BaseDataHandler[model_types._SupportedNumpyArray]):
             return features
     @staticmethod
-    def convert_to_df(data: model_types._SupportedNumpyArray) -> pd.DataFrame:
+    def convert_to_df(data: model_types._SupportedNumpyArray, ensure_serializable: bool = True) -> pd.DataFrame:
         if len(data.shape) == 1:
             data = np.expand_dims(data, axis=1)
         n_cols = data.shape[1]
         if len(data.shape) == 2:
-            return pd.DataFrame(data={i: data[:, i] for i in range(n_cols)})
+            return pd.DataFrame(data)
         else:
             n_rows = data.shape[0]
-            return pd.DataFrame(data={i: [np.array(data[k, i]) for k in range(n_rows)] for i in range(n_cols)})
+            if ensure_serializable:
+                return pd.DataFrame(data={i: [data[k, i].tolist() for k in range(n_rows)] for i in range(n_cols)})
+            return pd.DataFrame(data={i: [list(data[k, i]) for k in range(n_rows)] for i in range(n_cols)})
-class _ListOfNumpyArrayHandler(_BaseDataHandler[List[model_types._SupportedNumpyArray]]):
+class _SeqOfNumpyArrayHandler(_BaseDataHandler[Sequence[model_types._SupportedNumpyArray]]):
     @staticmethod
-    def can_handle(data: model_types.SupportedDataType) -> TypeGuard[List[model_types._SupportedNumpyArray]]:
-        return (
-            isinstance(data, list)
-            and len(data) > 0
-            and all(_NumpyArrayHandler.can_handle(data_col) for data_col in data)
-        )
+    def can_handle(data: model_types.SupportedDataType) -> TypeGuard[Sequence[model_types._SupportedNumpyArray]]:
+        if not isinstance(data, list):
+            return False
+        if len(data) == 0:
+            return False
+        if isinstance(data[0], np.ndarray):
+            return all(isinstance(data_col, np.ndarray) for data_col in data)
+        return False
     @staticmethod
-    def count(data: List[model_types._SupportedNumpyArray]) -> int:
+    def count(data: Sequence[model_types._SupportedNumpyArray]) -> int:
         return min(_NumpyArrayHandler.count(data_col) for data_col in data)
     @staticmethod
-    def truncate(data: List[model_types._SupportedNumpyArray]) -> List[model_types._SupportedNumpyArray]:
+    def truncate(data: Sequence[model_types._SupportedNumpyArray]) -> Sequence[model_types._SupportedNumpyArray]:
         return [
-            data_col[: min(_ListOfNumpyArrayHandler.count(data), _ListOfNumpyArrayHandler.SIG_INFER_ROWS_COUNT_LIMIT)]
+            data_col[: min(_SeqOfNumpyArrayHandler.count(data), _SeqOfNumpyArrayHandler.SIG_INFER_ROWS_COUNT_LIMIT)]
             for data_col in data
         ]
     @staticmethod
-    def validate(data: List[model_types._SupportedNumpyArray]) -> None:
+    def validate(data: Sequence[model_types._SupportedNumpyArray]) -> None:
         for data_col in data:
             _NumpyArrayHandler.validate(data_col)
     @staticmethod
     def infer_signature(
-        data: List[model_types._SupportedNumpyArray], role: Literal["input", "output"]
+        data: Sequence[model_types._SupportedNumpyArray], role: Literal["input", "output"]
     ) -> Sequence[BaseFeatureSpec]:
+        feature_prefix = f"{_SeqOfNumpyArrayHandler.FEATURE_PREFIX}_"
         features: List[BaseFeatureSpec] = []
         role_prefix = (
-            _ListOfNumpyArrayHandler.INPUT_PREFIX if role == "input" else _ListOfNumpyArrayHandler.OUTPUT_PREFIX
+            _SeqOfNumpyArrayHandler.INPUT_PREFIX if role == "input" else _SeqOfNumpyArrayHandler.OUTPUT_PREFIX
         ) + "_"
         for i, data_col in enumerate(data):
-            inferred_res = _NumpyArrayHandler.infer_signature(data_col, role)
-            for ft in inferred_res:
-                ft._name = f"{role_prefix}{i}_{ft._name[len(role_prefix):]}"
-            features.extend(inferred_res)
+            dtype = DataType.from_numpy_type(data_col.dtype)
+            ft_name = f"{role_prefix}{feature_prefix}{i}"
+            if len(data_col.shape) == 1:
+                features.append(FeatureSpec(dtype=dtype, name=ft_name))
+            else:
+                ft_shape = tuple(data_col.shape[1:])
+                features.append(FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape))
         return features
     @staticmethod
-    def convert_to_df(data: List[model_types._SupportedNumpyArray]) -> pd.DataFrame:
-        l_data = []
+    def convert_to_df(
+        data: Sequence[model_types._SupportedNumpyArray], ensure_serializable: bool = True
+    ) -> pd.DataFrame:
+        if ensure_serializable:
+            return pd.DataFrame(data={i: data_col.tolist() for i, data_col in enumerate(data)})
+        return pd.DataFrame(data={i: list(data_col) for i, data_col in enumerate(data)})
+class _SeqOfPyTorchTensorHandler(_BaseDataHandler[Sequence["torch.Tensor"]]):
+    @staticmethod
+    def can_handle(data: model_types.SupportedDataType) -> TypeGuard[Sequence["torch.Tensor"]]:
+        if not isinstance(data, list):
+            return False
+        if len(data) == 0:
+            return False
+        if type_utils.LazyType("torch.Tensor").isinstance(data[0]):
+            return all(type_utils.LazyType("torch.Tensor").isinstance(data_col) for data_col in data)
+        return False
+    @staticmethod
+    def count(data: Sequence["torch.Tensor"]) -> int:
+        return min(data_col.shape[0] for data_col in data)
+    @staticmethod
+    def truncate(data: Sequence["torch.Tensor"]) -> Sequence["torch.Tensor"]:
+        return [
+            data_col[
+                : min(_SeqOfPyTorchTensorHandler.count(data), _SeqOfPyTorchTensorHandler.SIG_INFER_ROWS_COUNT_LIMIT)
+            ]
+            for data_col in data
+        ]
+    @staticmethod
+    def validate(data: Sequence["torch.Tensor"]) -> None:
+        import torch
+        for data_col in data:
+            if data_col.shape == torch.Size([0]):
+                # Empty array
+                raise ValueError("Data Validation Error: Empty data is found.")
+            if data_col.shape == torch.Size([1]):
+                # scalar
+                raise ValueError("Data Validation Error: Scalar data is found.")
+    @staticmethod
+    def infer_signature(data: Sequence["torch.Tensor"], role: Literal["input", "output"]) -> Sequence[BaseFeatureSpec]:
+        feature_prefix = f"{_SeqOfPyTorchTensorHandler.FEATURE_PREFIX}_"
+        features: List[BaseFeatureSpec] = []
+        role_prefix = (
+            _SeqOfPyTorchTensorHandler.INPUT_PREFIX if role == "input" else _SeqOfPyTorchTensorHandler.OUTPUT_PREFIX
+        ) + "_"
+        for i, data_col in enumerate(data):
+            dtype = DataType.from_torch_type(data_col.dtype)
+            ft_name = f"{role_prefix}{feature_prefix}{i}"
+            if len(data_col.shape) == 1:
+                features.append(FeatureSpec(dtype=dtype, name=ft_name))
+            else:
+                ft_shape = tuple(data_col.shape[1:])
+                features.append(FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape))
+        return features
+    @staticmethod
+    def convert_to_df(data: Sequence["torch.Tensor"], ensure_serializable: bool = True) -> pd.DataFrame:
+        # Use list(...) instead of .tolist() to ensure that
+        # the content is still numpy array so that the type could be preserved.
+        # But that would not serializable and cannot use as UDF input and output.
+        if ensure_serializable:
+            return pd.DataFrame({i: data_col.detach().to("cpu").numpy().tolist() for i, data_col in enumerate(data)})
+        return pd.DataFrame({i: list(data_col.detach().to("cpu").numpy()) for i, data_col in enumerate(data)})
+    @staticmethod
+    def convert_from_df(
+        df: pd.DataFrame, features: Optional[Sequence[BaseFeatureSpec]] = None
+    ) -> Sequence["torch.Tensor"]:
+        import torch
+        res = []
+        if features:
+            for feature in features:
+                if isinstance(feature, FeatureGroupSpec):
+                    raise NotImplementedError("FeatureGroupSpec is not supported.")
+                assert isinstance(feature, FeatureSpec), "Invalid feature kind."
+                res.append(torch.from_numpy(np.stack(df[feature.name].to_numpy()).astype(feature._dtype._numpy_type)))
+            return res
+        return [torch.from_numpy(np.stack(df[col].to_numpy())) for col in df]
+class _SeqOfTensorflowTensorHandler(_BaseDataHandler[Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]]):
+    @staticmethod
+    def can_handle(
+        data: model_types.SupportedDataType,
+    ) -> TypeGuard[Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]]:
+        if not isinstance(data, list):
+            return False
+        if len(data) == 0:
+            return False
+        if type_utils.LazyType("tensorflow.Tensor").isinstance(data[0]) or type_utils.LazyType(
+            "tensorflow.Variable"
+        ).isinstance(data[0]):
+            return all(
+                type_utils.LazyType("tensorflow.Tensor").isinstance(data_col)
+                or type_utils.LazyType("tensorflow.Variable").isinstance(data_col)
+                for data_col in data
+            )
+        return False
+    @staticmethod
+    def count(data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]) -> int:
+        import tensorflow as tf
+        rows = []
+        for data_col in data:
+            shapes = data_col.shape.as_list()
+            if data_col.shape == tf.TensorShape(None) or (not shapes) or (shapes[0] is None):
+                # Unknown shape array
+                raise ValueError("Data Validation Error: Unknown shape data is found.")
+            # Make mypy happy
+            assert isinstance(shapes[0], int)
+            rows.append(shapes[0])
+        return min(rows)
+    @staticmethod
+    def truncate(
+        data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]
+    ) -> Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]:
+        return [
+            data_col[
+                : min(
+                    _SeqOfTensorflowTensorHandler.count(data), _SeqOfTensorflowTensorHandler.SIG_INFER_ROWS_COUNT_LIMIT
+                )
+            ]
+            for data_col in data
+        ]
+    @staticmethod
+    def validate(data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]) -> None:
+        import tensorflow as tf
         for data_col in data:
+            if data_col.shape == tf.TensorShape(None) or any(dim is None for dim in data_col.shape.as_list()):
+                # Unknown shape array
+                raise ValueError("Data Validation Error: Unknown shape data is found.")
+            if data_col.shape == tf.TensorShape([0]):
+                # Empty array
+                raise ValueError("Data Validation Error: Empty data is found.")
+            if data_col.shape == tf.TensorShape([1]) or data_col.shape == tf.TensorShape([]):
+                # scalar
+                raise ValueError("Data Validation Error: Scalar data is found.")
+    @staticmethod
+    def infer_signature(
+        data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]], role: Literal["input", "output"]
+    ) -> Sequence[BaseFeatureSpec]:
+        feature_prefix = f"{_SeqOfTensorflowTensorHandler.FEATURE_PREFIX}_"
+        features: List[BaseFeatureSpec] = []
+        role_prefix = (
+            _SeqOfTensorflowTensorHandler.INPUT_PREFIX
+            if role == "input"
+            else _SeqOfTensorflowTensorHandler.OUTPUT_PREFIX
+        ) + "_"
+        for i, data_col in enumerate(data):
+            dtype = DataType.from_numpy_type(data_col.dtype.as_numpy_dtype)
+            ft_name = f"{role_prefix}{feature_prefix}{i}"
             if len(data_col.shape) == 1:
-                l_data.append(np.expand_dims(data_col, axis=1))
+                features.append(FeatureSpec(dtype=dtype, name=ft_name))
             else:
-                l_data.append(data_col)
-        arr = np.concatenate(l_data, axis=1)
-        return _NumpyArrayHandler.convert_to_df(arr)
+                ft_shape = tuple(data_col.shape[1:])
+                features.append(FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape))
+        return features
+    @staticmethod
+    def convert_to_df(
+        data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]], ensure_serializable: bool = True
+    ) -> pd.DataFrame:
+        if ensure_serializable:
+            return pd.DataFrame({i: data_col.numpy().tolist() for i, data_col in enumerate(iterable=data)})
+        return pd.DataFrame({i: list(data_col.numpy()) for i, data_col in enumerate(iterable=data)})
+    @staticmethod
+    def convert_from_df(
+        df: pd.DataFrame, features: Optional[Sequence[BaseFeatureSpec]] = None
+    ) -> Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]:
+        import tensorflow as tf
+        res = []
+        if features:
+            for feature in features:
+                if isinstance(feature, FeatureGroupSpec):
+                    raise NotImplementedError("FeatureGroupSpec is not supported.")
+                assert isinstance(feature, FeatureSpec), "Invalid feature kind."
+                res.append(
+                    tf.convert_to_tensor(np.stack(df[feature.name].to_numpy()).astype(feature._dtype._numpy_type))
+                )
+            return res
+        return [tf.convert_to_tensor(np.stack(df[col].to_numpy())) for col in df]
 class _ListOfBuiltinHandler(_BaseDataHandler[model_types._SupportedBuiltinsList]):
@@ -684,7 +954,10 @@ class _ListOfBuiltinHandler(_BaseDataHandler[model_types._SupportedBuiltinsList]
         return _PandasDataFrameHandler.infer_signature(pd.DataFrame(data), role)
     @staticmethod
-    def convert_to_df(data: model_types._SupportedBuiltinsList) -> pd.DataFrame:
+    def convert_to_df(
+        data: model_types._SupportedBuiltinsList,
+        ensure_serializable: bool = True,
+    ) -> pd.DataFrame:
         return pd.DataFrame(data)
@@ -705,7 +978,12 @@ class _SnowparkDataFrameHandler(_BaseDataHandler[snowflake.snowpark.DataFrame]):
     def validate(data: snowflake.snowpark.DataFrame) -> None:
         schema = data.schema
         for field in schema.fields:
-            if not any(type.is_same_snowpark_type(field.datatype) for type in DataType):
+            data_type = field.datatype
+            if isinstance(data_type, spt.ArrayType):
+                actual_data_type = data_type.element_type
+            else:
+                actual_data_type = data_type
+            if not any(type.is_same_snowpark_type(actual_data_type) for type in DataType):
                 raise ValueError(
                     f"Data Validation Error: Unsupported data type {field.datatype} in column {field.name}."
                 )
@@ -718,19 +996,91 @@ class _SnowparkDataFrameHandler(_BaseDataHandler[snowflake.snowpark.DataFrame]):
         schema = data.schema
         for field in schema.fields:
             name = identifier.get_unescaped_names(field.name)
-            features.append(FeatureSpec(name=name, dtype=DataType.from_snowpark_type(field.datatype)))
+            if isinstance(field.datatype, spt.ArrayType):
+                raise NotImplementedError("Cannot infer model signature from Snowpark DataFrame with Array Type.")
+            else:
+                features.append(FeatureSpec(name=name, dtype=DataType.from_snowpark_type(field.datatype)))
         return features
     @staticmethod
-    def convert_to_df(data: snowflake.snowpark.DataFrame) -> snowflake.snowpark.DataFrame:
-        return data
+    def convert_to_df(
+        data: snowflake.snowpark.DataFrame,
+        ensure_serializable: bool = True,
+        features: Optional[Sequence[BaseFeatureSpec]] = None,
+    ) -> pd.DataFrame:
+        # This method do things on top of to_pandas, to make sure the local dataframe got is in correct shape.
+        dtype_map = {}
+        if features:
+            for feature in features:
+                if isinstance(feature, FeatureGroupSpec):
+                    raise NotImplementedError("FeatureGroupSpec is not supported.")
+                assert isinstance(feature, FeatureSpec), "Invalid feature kind."
+                dtype_map[feature.name] = feature.as_dtype()
+        df_local = data.to_pandas()
+        # This is because Array will become string (Even though the correct schema is set)
+        # and object will become variant type and requires an additional loads
+        # to get correct data otherwise it would be string.
+        for field in data.schema.fields:
+            if isinstance(field.datatype, spt.ArrayType):
+                df_local[identifier.get_unescaped_names(field.name)] = df_local[
+                    identifier.get_unescaped_names(field.name)
+                ].map(json.loads)
+        # Only when the feature is not from inference, we are confident to do the type casting.
+        # Otherwise, dtype_map will be empty
+        df_local = df_local.astype(dtype=dtype_map)
+        return df_local
+    @staticmethod
+    def convert_from_df(
+        session: snowflake.snowpark.Session, df: pd.DataFrame, keep_order: bool = True
+    ) -> snowflake.snowpark.DataFrame:
+        # This method is necessary to create the Snowpark Dataframe in correct schema.
+        # Snowpark ignore the schema argument when providing a pandas DataFrame.
+        # However, in this case, if a cell of the original Dataframe is some array type,
+        # they will be inferred as VARIANT.
+        # To make sure Snowpark get the correct schema, we have to provide in a list of records.
+        # However, in this case, the order could not be preserved. Thus, a _ID column has to be added,
+        # if keep_order is True.
+        # Although in this case, the column with array type can get correct ARRAY type, however, the element
+        # type is not preserved, and will become string type. This affect the implementation of convert_from_df.
+        df = _PandasDataFrameHandler.convert_to_df(df)
+        df_cols = df.columns
+        if df_cols.dtype != np.object_:
+            raise ValueError("Cannot convert a Pandas DataFrame whose column index is not a string")
+        features = _PandasDataFrameHandler.infer_signature(df, role="input")
+        # Role will be no effect on the column index. That is to say, the feature name is the actual column name.
+        schema_list = []
+        for feature in features:
+            if isinstance(feature, FeatureGroupSpec):
+                raise NotImplementedError("FeatureGroupSpec is not supported.")
+            assert isinstance(feature, FeatureSpec), "Invalid feature kind."
+            schema_list.append(
+                spt.StructField(
+                    identifier.get_inferred_name(feature.name),
+                    feature.as_snowpark_type(),
+                    nullable=df[feature.name].isnull().any(),
+                )
+            )
+        data = df.rename(columns=identifier.get_inferred_name).to_dict("records")
+        if keep_order:
+            for idx, data_item in enumerate(data):
+                data_item[infer_template._KEEP_ORDER_COL_NAME] = idx
+            schema_list.append(spt.StructField(infer_template._KEEP_ORDER_COL_NAME, spt.LongType(), nullable=False))
+        sp_df = session.create_dataframe(
+            data,  # To make sure the schema can be used, otherwise, array will become variant.
+            spt.StructType(schema_list),
+        )
+        return sp_df
 _LOCAL_DATA_HANDLERS: List[Type[_BaseDataHandler[Any]]] = [
     _PandasDataFrameHandler,
     _NumpyArrayHandler,
-    _ListOfNumpyArrayHandler,
     _ListOfBuiltinHandler,
+    _SeqOfNumpyArrayHandler,
+    _SeqOfPyTorchTensorHandler,
+    _SeqOfTensorflowTensorHandler,
 ]
 _ALL_DATA_HANDLERS = _LOCAL_DATA_HANDLERS + [_SnowparkDataFrameHandler]
@@ -1007,22 +1357,36 @@ def _validate_snowpark_data(data: snowflake.snowpark.DataFrame, features: Sequen
                     raise NotImplementedError("FeatureGroupSpec is not supported.")
                 assert isinstance(feature, FeatureSpec), "Invalid feature kind."
                 ft_type = feature._dtype
-                if not ft_type.is_same_snowpark_type(field.datatype):
-                    raise ValueError(
-                        f"Data Validation Error in feature {ft_name}: "
-                        + f"Feature type {ft_type} is not met by column {field.name}."
+                field_data_type = field.datatype
+                if isinstance(field_data_type, spt.ArrayType):
+                    if feature._shape is None:
+                        raise ValueError(
+                            f"Data Validation Error in feature {ft_name}: "
+                            + f"Feature is a array feature, while {field.name} is not."
+                        )
+                    warnings.warn(
+                        f"Warn in feature {ft_name}: Feature is a array feature," + " type validation cannot happen.",
+                        category=RuntimeWarning,
                     )
+                else:
+                    if feature._shape:
+                        raise ValueError(
+                            f"Data Validation Error in feature {ft_name}: "
+                            + f"Feature is a scalar feature, while {field.name} is not."
+                        )
+                    if not ft_type.is_same_snowpark_type(field_data_type):
+                        raise ValueError(
+                            f"Data Validation Error in feature {ft_name}: "
+                            + f"Feature type {ft_type} is not met by column {field.name}."
+                        )
         if not found:
             raise ValueError(f"Data Validation Error: feature {ft_name} does not exist in data.")
-def _convert_and_validate_local_data(
-    data: model_types.SupportedDataType, features: Sequence[BaseFeatureSpec]
-) -> pd.DataFrame:
-    """Validate the data with features in model signature and convert to DataFrame
+def _convert_local_data_to_df(data: model_types.SupportedLocalDataType) -> pd.DataFrame:
+    """Convert local data to pandas DataFrame or Snowpark DataFrame
     Args:
-        features: A list of feature specs that the data should follow.
         data: The provided data.
     Raises:
@@ -1035,13 +1399,29 @@ def _convert_and_validate_local_data(
     for handler in _LOCAL_DATA_HANDLERS:
         if handler.can_handle(data):
             handler.validate(data)
-            df = handler.convert_to_df(data)
+            df = handler.convert_to_df(data, ensure_serializable=False)
             break
     if df is None:
         raise ValueError(f"Data Validation Error: Un-supported type {type(data)} provided.")
-    assert isinstance(df, pd.DataFrame)
+    return df
+def _convert_and_validate_local_data(
+    data: model_types.SupportedLocalDataType, features: Sequence[BaseFeatureSpec]
+) -> pd.DataFrame:
+    """Validate the data with features in model signature and convert to DataFrame
+    Args:
+        features: A list of feature specs that the data should follow.
+        data: The provided data.
+    Returns:
+        The converted dataframe with renamed column index.
+    """
+    df = _convert_local_data_to_df(data)
     df = _rename_pandas_df(df, features)
     _validate_pandas_df(df, features)
+    df = _PandasDataFrameHandler.convert_to_df(df, ensure_serializable=True)
     return df

snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl