PyPI - snowflake-ml-python - Versions diffs - 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl - Mend

snowflake-ml-python 1.4.1py3-none-any.whl → 1.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (218) hide show

snowflake/ml/_internal/env_utils.py +72 -31
snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
snowflake/ml/_internal/exceptions/error_codes.py +3 -0
snowflake/ml/_internal/lineage/data_source.py +10 -0
snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
snowflake/ml/_internal/telemetry.py +1 -0
snowflake/ml/_internal/utils/identifier.py +1 -1
snowflake/ml/_internal/utils/sql_identifier.py +14 -1
snowflake/ml/dataset/__init__.py +11 -0
snowflake/ml/dataset/dataset.py +455 -129
snowflake/ml/dataset/dataset_factory.py +53 -0
snowflake/ml/dataset/dataset_metadata.py +103 -0
snowflake/ml/dataset/dataset_reader.py +199 -0
snowflake/ml/feature_store/__init__.py +6 -0
snowflake/ml/feature_store/access_manager.py +279 -0
snowflake/ml/feature_store/feature_store.py +544 -358
snowflake/ml/feature_store/feature_view.py +55 -16
snowflake/ml/fileset/embedded_stage_fs.py +149 -0
snowflake/ml/fileset/sfcfs.py +0 -4
snowflake/ml/fileset/snowfs.py +160 -0
snowflake/ml/fileset/stage_fs.py +25 -10
snowflake/ml/model/__init__.py +2 -2
snowflake/ml/model/_api.py +16 -1
snowflake/ml/model/_client/model/model_impl.py +65 -31
snowflake/ml/model/_client/model/model_version_impl.py +159 -2
snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
snowflake/ml/model/_client/ops/model_ops.py +268 -83
snowflake/ml/model/_client/sql/_base.py +34 -0
snowflake/ml/model/_client/sql/model.py +42 -47
snowflake/ml/model/_client/sql/model_version.py +164 -39
snowflake/ml/model/_client/sql/stage.py +6 -32
snowflake/ml/model/_client/sql/tag.py +32 -56
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
snowflake/ml/model/_model_composer/model_composer.py +22 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +22 -0
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +11 -0
snowflake/ml/model/_packager/model_env/model_env.py +41 -0
snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
snowflake/ml/model/_packager/model_meta/model_meta.py +1 -5
snowflake/ml/model/_packager/model_packager.py +0 -3
snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
snowflake/ml/modeling/_internal/model_trainer.py +7 -0
snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +50 -21
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +24 -2
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +340 -17
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -52
snowflake/ml/modeling/cluster/affinity_propagation.py +51 -52
snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -52
snowflake/ml/modeling/cluster/birch.py +53 -52
snowflake/ml/modeling/cluster/bisecting_k_means.py +53 -52
snowflake/ml/modeling/cluster/dbscan.py +51 -52
snowflake/ml/modeling/cluster/feature_agglomeration.py +53 -52
snowflake/ml/modeling/cluster/k_means.py +53 -52
snowflake/ml/modeling/cluster/mean_shift.py +51 -52
snowflake/ml/modeling/cluster/mini_batch_k_means.py +53 -52
snowflake/ml/modeling/cluster/optics.py +51 -52
snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -52
snowflake/ml/modeling/cluster/spectral_clustering.py +51 -52
snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -52
snowflake/ml/modeling/compose/column_transformer.py +53 -52
snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -52
snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -52
snowflake/ml/modeling/covariance/empirical_covariance.py +51 -52
snowflake/ml/modeling/covariance/graphical_lasso.py +51 -52
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -52
snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -52
snowflake/ml/modeling/covariance/min_cov_det.py +51 -52
snowflake/ml/modeling/covariance/oas.py +51 -52
snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -52
snowflake/ml/modeling/decomposition/dictionary_learning.py +53 -52
snowflake/ml/modeling/decomposition/factor_analysis.py +53 -52
snowflake/ml/modeling/decomposition/fast_ica.py +53 -52
snowflake/ml/modeling/decomposition/incremental_pca.py +53 -52
snowflake/ml/modeling/decomposition/kernel_pca.py +53 -52
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +53 -52
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +53 -52
snowflake/ml/modeling/decomposition/pca.py +53 -52
snowflake/ml/modeling/decomposition/sparse_pca.py +53 -52
snowflake/ml/modeling/decomposition/truncated_svd.py +53 -52
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +53 -52
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -52
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -52
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -52
snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -52
snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -52
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -52
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -52
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -52
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -52
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -52
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -52
snowflake/ml/modeling/ensemble/isolation_forest.py +51 -52
snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -52
snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -52
snowflake/ml/modeling/ensemble/stacking_regressor.py +53 -52
snowflake/ml/modeling/ensemble/voting_classifier.py +53 -52
snowflake/ml/modeling/ensemble/voting_regressor.py +53 -52
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +53 -52
snowflake/ml/modeling/feature_selection/select_fdr.py +53 -52
snowflake/ml/modeling/feature_selection/select_fpr.py +53 -52
snowflake/ml/modeling/feature_selection/select_fwe.py +53 -52
snowflake/ml/modeling/feature_selection/select_k_best.py +53 -52
snowflake/ml/modeling/feature_selection/select_percentile.py +53 -52
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +53 -52
snowflake/ml/modeling/feature_selection/variance_threshold.py +53 -52
snowflake/ml/modeling/framework/base.py +64 -36
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -52
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -52
snowflake/ml/modeling/impute/iterative_imputer.py +53 -52
snowflake/ml/modeling/impute/knn_imputer.py +53 -52
snowflake/ml/modeling/impute/missing_indicator.py +53 -52
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +53 -52
snowflake/ml/modeling/kernel_approximation/nystroem.py +53 -52
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +53 -52
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +53 -52
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +53 -52
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -52
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -52
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -52
snowflake/ml/modeling/linear_model/ard_regression.py +51 -52
snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -52
snowflake/ml/modeling/linear_model/elastic_net.py +51 -52
snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -52
snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -52
snowflake/ml/modeling/linear_model/huber_regressor.py +51 -52
snowflake/ml/modeling/linear_model/lars.py +51 -52
snowflake/ml/modeling/linear_model/lars_cv.py +51 -52
snowflake/ml/modeling/linear_model/lasso.py +51 -52
snowflake/ml/modeling/linear_model/lasso_cv.py +51 -52
snowflake/ml/modeling/linear_model/lasso_lars.py +51 -52
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -52
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -52
snowflake/ml/modeling/linear_model/linear_regression.py +51 -52
snowflake/ml/modeling/linear_model/logistic_regression.py +51 -52
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -52
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -52
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -52
snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -52
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -52
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -52
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -52
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -52
snowflake/ml/modeling/linear_model/perceptron.py +51 -52
snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -52
snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -52
snowflake/ml/modeling/linear_model/ridge.py +51 -52
snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -52
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -52
snowflake/ml/modeling/linear_model/ridge_cv.py +51 -52
snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -52
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -52
snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -52
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -52
snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -52
snowflake/ml/modeling/manifold/isomap.py +53 -52
snowflake/ml/modeling/manifold/mds.py +53 -52
snowflake/ml/modeling/manifold/spectral_embedding.py +53 -52
snowflake/ml/modeling/manifold/tsne.py +53 -52
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -52
snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -52
snowflake/ml/modeling/model_selection/grid_search_cv.py +21 -23
snowflake/ml/modeling/model_selection/randomized_search_cv.py +38 -20
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -52
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -52
snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -52
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -52
snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -52
snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -52
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -52
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -52
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -52
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -52
snowflake/ml/modeling/neighbors/kernel_density.py +51 -52
snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -52
snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -52
snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -52
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +53 -52
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -52
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -52
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +53 -52
snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -52
snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -52
snowflake/ml/modeling/pipeline/pipeline.py +538 -36
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +12 -0
snowflake/ml/modeling/preprocessing/polynomial_features.py +53 -52
snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -52
snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -52
snowflake/ml/modeling/svm/linear_svc.py +51 -52
snowflake/ml/modeling/svm/linear_svr.py +51 -52
snowflake/ml/modeling/svm/nu_svc.py +51 -52
snowflake/ml/modeling/svm/nu_svr.py +51 -52
snowflake/ml/modeling/svm/svc.py +51 -52
snowflake/ml/modeling/svm/svr.py +51 -52
snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -52
snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -52
snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -52
snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -52
snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -52
snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -52
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -52
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -52
snowflake/ml/registry/_manager/model_manager.py +36 -7
snowflake/ml/registry/model_registry.py +3 -149
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/METADATA +112 -7
{snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/RECORD +216 -206
snowflake/ml/registry/_artifact_manager.py +0 -156
snowflake/ml/registry/artifact.py +0 -46
{snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/top_level.txt +0 -0

snowflake/ml/dataset/dataset_metadata.py ADDED Viewed

@@ -0,0 +1,103 @@
+import dataclasses
+import json
+import typing
+from typing import Any, Dict, List, Optional, Union
+_PROPERTY_TYPE_KEY = "$proptype$"
+DATASET_SCHEMA_VERSION = "1"
+@dataclasses.dataclass(frozen=True)
+class FeatureStoreMetadata:
+    """
+    Feature store metadata.
+    Properties:
+        spine_query: The input query on source table which will be joined with features.
+        serialized_feature_views: A list of serialized feature objects in the feature store.
+        spine_timestamp_col: Timestamp column which was used for point-in-time correct feature lookup.
+    """
+    spine_query: str
+    serialized_feature_views: List[str]
+    spine_timestamp_col: Optional[str] = None
+    def to_json(self) -> str:
+        return json.dumps(dataclasses.asdict(self))
+    @classmethod
+    def from_json(cls, input_json: Union[Dict[str, Any], str, bytes]) -> "FeatureStoreMetadata":
+        if isinstance(input_json, dict):
+            return cls(**input_json)
+        return cls(**json.loads(input_json))
+DatasetPropertiesType = Union[
+    FeatureStoreMetadata,
+]
+# Union[T] gets automatically squashed to T, so default to [T] if get_args() returns empty
+_DatasetPropTypes = typing.get_args(DatasetPropertiesType) or [DatasetPropertiesType]
+_DatasetPropTypeDict = {t.__name__: t for t in _DatasetPropTypes}
+@dataclasses.dataclass(frozen=True)
+class DatasetMetadata:
+    """
+    Dataset metadata.
+    Properties:
+        source_query: The query string used to produce the Dataset.
+        owner: The owner of the Dataset.
+        generation_timestamp: The timestamp when this dataset was generated.
+        exclude_cols: Name of column(s) in dataset to be excluded during training/testing.
+            These are typically columns for human inspection such as timestamp or other meta-information.
+            Columns included in `label_cols` do not need to be included here.
+        label_cols: Name of column(s) in dataset that contains labels.
+        properties: Additional metadata properties.
+    """
+    source_query: str
+    owner: str
+    exclude_cols: Optional[List[str]] = None
+    label_cols: Optional[List[str]] = None
+    properties: Optional[DatasetPropertiesType] = None
+    schema_version: str = dataclasses.field(default=DATASET_SCHEMA_VERSION, init=False)
+    def to_json(self) -> str:
+        state_dict = dataclasses.asdict(self)
+        if self.properties:
+            prop_type = type(self.properties).__name__
+            if prop_type not in _DatasetPropTypeDict:
+                raise ValueError(
+                    f"Unsupported `properties` type={prop_type} (supported={','.join(_DatasetPropTypeDict.keys())})"
+                )
+            state_dict[_PROPERTY_TYPE_KEY] = prop_type
+        return json.dumps(state_dict)
+    @classmethod
+    def from_json(cls, input_json: Union[Dict[str, Any], str, bytes]) -> "DatasetMetadata":
+        if not input_json:
+            raise ValueError("json_str was empty or None")
+        try:
+            state_dict: Dict[str, Any] = (
+                input_json if isinstance(input_json, dict) else json.loads(input_json, strict=False)
+            )
+            # TODO: Validate schema version
+            _ = state_dict.pop("schema_version", DATASET_SCHEMA_VERSION)
+            prop_type = state_dict.pop(_PROPERTY_TYPE_KEY, None)
+            prop_values = state_dict.get("properties", {})
+            if prop_type:
+                prop_cls = _DatasetPropTypeDict.get(prop_type, None)
+                if prop_cls is None:
+                    raise TypeError(
+                        f"Unsupported `properties` type={prop_type} (supported={','.join(_DatasetPropTypeDict.keys())})"
+                    )
+                state_dict["properties"] = prop_cls(**prop_values)
+            elif prop_values:
+                raise TypeError(f"`properties` provided but missing `{_PROPERTY_TYPE_KEY}`")
+            return cls(**state_dict)
+        except TypeError as e:
+            raise ValueError("Invalid input schema") from e

snowflake/ml/dataset/dataset_reader.py ADDED Viewed

@@ -0,0 +1,199 @@
+from typing import Any, List
+import pandas as pd
+from pyarrow import parquet as pq
+from snowflake import snowpark
+from snowflake.ml._internal import telemetry
+from snowflake.ml._internal.lineage import data_source, lineage_utils
+from snowflake.ml._internal.utils import import_utils
+from snowflake.ml.fileset import snowfs
+_PROJECT = "Dataset"
+_SUBPROJECT = "DatasetReader"
+TARGET_FILE_SIZE = 32 * 2**20  # The max file size for data loading.
+class DatasetReader:
+    """Snowflake Dataset abstraction which provides application integration connectors"""
+    @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
+    def __init__(
+        self,
+        session: snowpark.Session,
+        sources: List[data_source.DataSource],
+    ) -> None:
+        """Initialize a DatasetVersion object.
+        Args:
+            session: Snowpark Session to interact with Snowflake backend.
+            sources: Data sources to read from.
+        Raises:
+            ValueError: `sources` arg was empty or null
+        """
+        if not sources:
+            raise ValueError("Invalid input: empty `sources` list not allowed")
+        self._session = session
+        self._sources = sources
+        self._fs: snowfs.SnowFileSystem = snowfs.SnowFileSystem(
+            snowpark_session=self._session,
+            cache_type="bytes",
+            block_size=2 * TARGET_FILE_SIZE,
+        )
+        self._files: List[str] = []
+    def _list_files(self) -> List[str]:
+        """Private helper function that lists all files in this DatasetVersion and caches the results."""
+        if self._files:
+            return self._files
+        files: List[str] = []
+        for source in self._sources:
+            # Sort within each source for consistent ordering
+            files.extend(sorted(self._fs.ls(source.url)))  # type: ignore[arg-type]
+        files.sort()
+        self._files = files
+        return self._files
+    @property
+    def data_sources(self) -> List[data_source.DataSource]:
+        return self._sources
+    @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
+    def files(self) -> List[str]:
+        """Get the list of remote file paths for the current DatasetVersion.
+        The file paths follows the snow protocol.
+        Returns:
+            A list of remote file paths
+        Example:
+        >>> dsv.files()
+        ----
+        ["snow://dataset/mydb.myschema.mydataset/versions/test/data_0_0_0.snappy.parquet",
+         "snow://dataset/mydb.myschema.mydataset/versions/test/data_0_0_1.snappy.parquet"]
+        """
+        files = self._list_files()
+        return [self._fs.unstrip_protocol(f) for f in files]
+    @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
+    def filesystem(self) -> snowfs.SnowFileSystem:
+        """Return an fsspec FileSystem which can be used to load the DatasetVersion's `files()`"""
+        return self._fs
+    @telemetry.send_api_usage_telemetry(
+        project=_PROJECT,
+        subproject=_SUBPROJECT,
+        func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
+    )
+    def to_torch_datapipe(self, *, batch_size: int, shuffle: bool = False, drop_last_batch: bool = True) -> Any:
+        """Transform the Snowflake data into a ready-to-use Pytorch datapipe.
+        Return a Pytorch datapipe which iterates on rows of data.
+        Args:
+            batch_size: It specifies the size of each data batch which will be
+                yield in the result datapipe
+            shuffle: It specifies whether the data will be shuffled. If True, files will be shuffled, and
+                rows in each file will also be shuffled.
+            drop_last_batch: Whether the last batch of data should be dropped. If set to be true,
+                then the last batch will get dropped if its size is smaller than the given batch_size.
+        Returns:
+            A Pytorch iterable datapipe that yield data.
+        Examples:
+        >>> dp = dataset.to_torch_datapipe(batch_size=1)
+        >>> for data in dp:
+        >>>     print(data)
+        ----
+        {'_COL_1':[10]}
+        """
+        IterableWrapper, _ = import_utils.import_or_get_dummy("torchdata.datapipes.iter.IterableWrapper")
+        torch_datapipe_module, _ = import_utils.import_or_get_dummy("snowflake.ml.fileset.torch_datapipe")
+        self._fs.optimize_read(self._list_files())
+        input_dp = IterableWrapper(self._list_files())
+        return torch_datapipe_module.ReadAndParseParquet(input_dp, self._fs, batch_size, shuffle, drop_last_batch)
+    @telemetry.send_api_usage_telemetry(
+        project=_PROJECT,
+        subproject=_SUBPROJECT,
+        func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
+    )
+    def to_tf_dataset(self, *, batch_size: int, shuffle: bool = False, drop_last_batch: bool = True) -> Any:
+        """Transform the Snowflake data into a ready-to-use TensorFlow tf.data.Dataset.
+        Args:
+            batch_size: It specifies the size of each data batch which will be
+                yield in the result datapipe
+            shuffle: It specifies whether the data will be shuffled. If True, files will be shuffled, and
+                rows in each file will also be shuffled.
+            drop_last_batch: Whether the last batch of data should be dropped. If set to be true,
+                then the last batch will get dropped if its size is smaller than the given batch_size.
+        Returns:
+            A tf.data.Dataset that yields batched tf.Tensors.
+        Examples:
+        >>> dp = dataset.to_tf_dataset(batch_size=1)
+        >>> for data in dp:
+        >>>     print(data)
+        ----
+        {'_COL_1': <tf.Tensor: shape=(1,), dtype=int64, numpy=[10]>}
+        """
+        tf_dataset_module, _ = import_utils.import_or_get_dummy("snowflake.ml.fileset.tf_dataset")
+        self._fs.optimize_read(self._list_files())
+        return tf_dataset_module.read_and_parse_parquet(
+            self._list_files(), self._fs, batch_size, shuffle, drop_last_batch
+        )
+    @telemetry.send_api_usage_telemetry(
+        project=_PROJECT,
+        subproject=_SUBPROJECT,
+        func_params_to_log=["only_feature_cols"],
+    )
+    def to_snowpark_dataframe(self, only_feature_cols: bool = False) -> snowpark.DataFrame:
+        """Convert the DatasetVersion to a Snowpark DataFrame.
+        Args:
+            only_feature_cols: If True, drops exclude_cols and label_cols from returned DataFrame.
+                The original DatasetVersion is unaffected.
+        Returns:
+            A Snowpark dataframe that contains the data of this DatasetVersion.
+        Note: The dataframe generated by this method might not have the same schema as the original one. Specifically,
+            - NUMBER type with scale != 0 will become float.
+            - Unsupported types (see comments of :func:`Dataset.create_version`) will not have any guarantee.
+                For example, an OBJECT column may be scanned back as a STRING column.
+        """
+        file_path_pattern = ".*data_.*[.]parquet"
+        dfs: List[snowpark.DataFrame] = []
+        for source in self._sources:
+            df = self._session.read.option("pattern", file_path_pattern).parquet(source.url)
+            if only_feature_cols and source.exclude_cols:
+                df = df.drop(source.exclude_cols)
+            dfs.append(df)
+        combined_df = dfs[0]
+        for df in dfs[1:]:
+            combined_df = combined_df.union_all_by_name(df)
+        return lineage_utils.patch_dataframe(combined_df, data_sources=self._sources, inplace=True)
+    @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
+    def to_pandas(self) -> pd.DataFrame:
+        """Retrieve the DatasetVersion contents as a Pandas Dataframe"""
+        files = self._list_files()
+        if not files:
+            return pd.DataFrame()  # Return empty DataFrame
+        self._fs.optimize_read(files)
+        pd_ds = pq.ParquetDataset(files, filesystem=self._fs)
+        return pd_ds.read_pandas().to_pandas()

snowflake/ml/feature_store/__init__.py CHANGED Viewed

@@ -2,8 +2,14 @@ import os
 from snowflake.ml._internal import init_utils
+from .access_manager import setup_feature_store
 pkg_dir = os.path.dirname(os.path.abspath(__file__))
 pkg_name = __name__
 exportable_classes = init_utils.fetch_classes_from_modules_in_pkg_dir(pkg_dir=pkg_dir, pkg_name=pkg_name)
 for k, v in exportable_classes.items():
     globals()[k] = v
+__all__ = list(exportable_classes.keys()) + [
+    "setup_feature_store",
+]

snowflake/ml/feature_store/access_manager.py ADDED Viewed

@@ -0,0 +1,279 @@
+from dataclasses import asdict, dataclass
+from enum import Enum
+from typing import Dict, List, Optional
+from warnings import warn
+from snowflake.ml._internal import telemetry
+from snowflake.ml._internal.utils.query_result_checker import SqlResultValidator
+from snowflake.ml._internal.utils.sql_identifier import SqlIdentifier
+from snowflake.ml.feature_store.feature_store import (
+    _FEATURE_STORE_OBJECT_TAG,
+    _FEATURE_VIEW_METADATA_TAG,
+    CreationMode,
+    FeatureStore,
+)
+from snowflake.snowpark import Session, exceptions
+_PROJECT = "FeatureStore"
+_ALL_OBJECTS = "@ALL_OBJECTS"  # Special flag to mark "all+future" grants
+class _FeatureStoreRole(Enum):
+    NONE = 0  # For testing purposes
+    CONSUMER = 1
+    PRODUCER = 2
+@dataclass(frozen=True)
+class _Privilege:
+    object_type: str
+    object_name: str
+    privileges: List[str]
+    scope: Optional[str] = None
+@dataclass(frozen=True)
+class _SessionInfo:
+    database: SqlIdentifier
+    schema: SqlIdentifier
+    warehouse: SqlIdentifier
+# Lists of permissions as tuples of (OBJECT_TYPE, [PRIVILEGES, ...])
+_PRE_INIT_PRIVILEGES: Dict[_FeatureStoreRole, List[_Privilege]] = {
+    _FeatureStoreRole.PRODUCER: [
+        _Privilege(
+            "SCHEMA",
+            "{database}.{schema}",
+            [
+                "CREATE DYNAMIC TABLE",
+                "CREATE TAG",
+                "CREATE VIEW",
+                "CREATE TASK",
+                "CREATE TABLE",
+            ],
+        ),
+        _Privilege(
+            "SCHEMA",
+            "{database}.{schema}",
+            [
+                "CREATE DATASET",  # Handle DATASET privilege separately since it may not be enabled
+            ],
+        ),
+        _Privilege("DYNAMIC TABLE", _ALL_OBJECTS, ["OPERATE"], "SCHEMA {database}.{schema}"),
+        _Privilege("TASK", _ALL_OBJECTS, ["OPERATE"], "SCHEMA {database}.{schema}"),
+    ],
+    _FeatureStoreRole.CONSUMER: [
+        _Privilege("DATABASE", "{database}", ["USAGE"]),
+        _Privilege("SCHEMA", "{database}.{schema}", ["USAGE"]),
+        _Privilege("DYNAMIC TABLE", _ALL_OBJECTS, ["SELECT", "MONITOR"], "SCHEMA {database}.{schema}"),
+        _Privilege("VIEW", _ALL_OBJECTS, ["SELECT", "REFERENCES"], "SCHEMA {database}.{schema}"),
+        _Privilege("TABLE", _ALL_OBJECTS, ["SELECT", "REFERENCES"], "SCHEMA {database}.{schema}"),
+        # FIXME(dhung): FUTURE DATASETS not supported until 8.19
+        # _Privilege("DATASET", _ALL_OBJECTS, ["USAGE"], "SCHEMA {database}.{schema}"),
+        # User should decide whether they want to grant warehouse usage to CONSUMER
+        # _Privilege("WAREHOUSE", "{warehouse}", ["USAGE"]),
+    ],
+    _FeatureStoreRole.NONE: [],
+}
+_POST_INIT_PRIVILEGES: Dict[_FeatureStoreRole, List[_Privilege]] = {
+    _FeatureStoreRole.PRODUCER: [
+        _Privilege("TAG", f"{{database}}.{{schema}}.{_FEATURE_VIEW_METADATA_TAG}", ["APPLY"]),
+        _Privilege("TAG", f"{{database}}.{{schema}}.{_FEATURE_STORE_OBJECT_TAG}", ["APPLY"]),
+    ],
+    _FeatureStoreRole.CONSUMER: [],
+    _FeatureStoreRole.NONE: [],
+}
+def _grant_privileges(
+    session: Session, role_name: str, privileges: List[_Privilege], session_info: _SessionInfo
+) -> None:
+    session_info_dict = asdict(session_info)
+    for p in privileges:
+        if p.object_name == _ALL_OBJECTS:
+            # Ensure obj is plural
+            obj = p.object_type.upper()
+            if not obj.endswith("S"):
+                obj += "S"
+            grant_objects = [f"{prefix} {obj}" for prefix in ("FUTURE", "ALL")]
+        else:
+            grant_objects = [f"{p.object_type} {p.object_name.format(**session_info_dict)}"]
+        try:
+            for grant_object in grant_objects:
+                query = f"GRANT {','.join(p.privileges)} ON {grant_object}"
+                if p.scope:
+                    query += f" IN {p.scope.format(**session_info_dict)}"
+                query += f" TO ROLE {role_name}"
+                session.sql(query).collect()
+        except exceptions.SnowparkSQLException as e:
+            if any(
+                s in e.message
+                for s in (
+                    "Ask your account admin",
+                    "Object type or Class",
+                    p.object_type,
+                )
+            ):
+                warn(
+                    f"Failed to grant privilege for {p.object_type}: {e.message}",
+                    UserWarning,
+                    stacklevel=1,
+                )
+            else:
+                raise
+def _configure_pre_init_privileges(
+    session: Session,
+    session_info: _SessionInfo,
+    producer_role: str = "SNOWML_FEATURE_STORE_PRODUCER_RL",
+    consumer_role: str = "SNOWML_FEATURE_STORE_CONSUMER_RL",
+) -> None:
+    """
+    Configure Feature Store role privileges. Must be run with ACCOUNTADMIN
+    or a role with `MANAGE GRANTS` privilege.
+    See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
+    about privilege grants in Snowflake.
+    Args:
+        session: Snowpark Session to interact with Snowflake backend.
+        session_info: Session info like database and schema for the FeatureStore instance.
+        producer_role: Name of producer role to be configured.
+        consumer_role: Name of consumer role to be configured.
+    """
+    # Create schema if not already exists
+    (create_rst,) = (
+        SqlResultValidator(
+            session,
+            f"CREATE SCHEMA IF NOT EXISTS {session_info.database}.{session_info.schema}",
+        )
+        .has_dimensions(expected_rows=1)
+        .has_column("status")
+        .validate()
+    )
+    schema_created = create_rst["status"].endswith("successfully created.")
+    # Pass schema ownership from admin to PRODUCER
+    if schema_created:
+        session.sql(
+            f"GRANT OWNERSHIP ON SCHEMA {session_info.database}.{session_info.schema} TO ROLE {producer_role}"
+        ).collect()
+    # Grant privileges to roles
+    _grant_privileges(session, producer_role, _PRE_INIT_PRIVILEGES[_FeatureStoreRole.PRODUCER], session_info)
+    _grant_privileges(session, consumer_role, _PRE_INIT_PRIVILEGES[_FeatureStoreRole.CONSUMER], session_info)
+def _configure_post_init_privileges(
+    session: Session,
+    session_info: _SessionInfo,
+    producer_role: str = "FS_PRODUCER",
+    consumer_role: str = "FS_CONSUMER",
+) -> None:
+    _grant_privileges(session, producer_role, _POST_INIT_PRIVILEGES[_FeatureStoreRole.PRODUCER], session_info)
+    _grant_privileges(session, consumer_role, _POST_INIT_PRIVILEGES[_FeatureStoreRole.CONSUMER], session_info)
+def _configure_role_hierarchy(
+    session: Session,
+    producer_role: str,
+    consumer_role: str,
+) -> None:
+    """
+    Create Feature Store roles and configure role hierarchy hierarchy. Must be run with
+    ACCOUNTADMIN or a role with `CREATE ROLE` privilege.
+    See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
+    about privilege grants in Snowflake.
+    Args:
+        session: Snowpark Session to interact with Snowflake backend.
+        producer_role: Name of producer role to be configured.
+        consumer_role: Name of consumer role to be configured.
+    """
+    producer_role = SqlIdentifier(producer_role)
+    consumer_role = SqlIdentifier(consumer_role)
+    # Create the necessary roles
+    session.sql(f"CREATE ROLE IF NOT EXISTS {producer_role}").collect()
+    session.sql(f"CREATE ROLE IF NOT EXISTS {consumer_role}").collect()
+    # Build role hierarchy
+    session.sql(f"GRANT ROLE {consumer_role} TO ROLE {producer_role}").collect()
+    session.sql(f"GRANT ROLE {producer_role} TO ROLE SYSADMIN").collect()
+    session.sql(f"GRANT ROLE {producer_role} TO ROLE {session.get_current_role()}").collect()
+@telemetry.send_api_usage_telemetry(project=_PROJECT)
+def setup_feature_store(
+    session: Session,
+    database: str,
+    schema: str,
+    warehouse: str,
+    producer_role: str = "FS_PRODUCER",
+    consumer_role: str = "FS_CONSUMER",
+) -> FeatureStore:
+    """
+    Sets up a new Feature Store including role/privilege setup. Must be run with ACCOUNTADMIN
+    or a role with `MANAGE GRANTS` and `CREATE ROLE` privileges.
+    See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
+    about privilege grants in Snowflake.
+    Args:
+        session: Snowpark Session to interact with Snowflake backend.
+        database: Database to create the FeatureStore instance.
+        schema: Schema to create the FeatureStore instance.
+        warehouse: Default warehouse for Feature Store compute.
+        producer_role: Name of producer role to be configured.
+        consumer_role: Name of consumer role to be configured.
+    Returns:
+        Feature Store instance.
+    Raises:
+        exceptions.SnowparkSQLException: Insufficient privileges.
+    """
+    database = SqlIdentifier(database)
+    schema = SqlIdentifier(schema)
+    warehouse = SqlIdentifier(warehouse)
+    session_info = _SessionInfo(
+        SqlIdentifier(database),
+        SqlIdentifier(schema),
+        SqlIdentifier(warehouse),
+    )
+    try:
+        _configure_role_hierarchy(session, producer_role=producer_role, consumer_role=consumer_role)
+    except exceptions.SnowparkSQLException:
+        # Error can be safely ignored if roles already exist and hierarchy is already built
+        for role in (producer_role, consumer_role):
+            # Ensure roles already exist
+            if session.sql(f"SHOW ROLES LIKE '{role}' STARTS WITH '{role}'").count() == 0:
+                raise
+        # Ensure hierarchy already configured
+        consumer_grants = session.sql(f"SHOW GRANTS ON ROLE {consumer_role}").collect()
+        if not any(r["granted_to"] == "ROLE" and r["grantee_name"] == producer_role for r in consumer_grants):
+            raise
+    # Do any pre-FeatureStore.__init__() privilege setup
+    _configure_pre_init_privileges(session, session_info, producer_role, consumer_role)
+    # Use PRODUCER role to create and operate new Feature Store
+    current_role = session.get_current_role()
+    assert current_role is not None  # to make mypy happy
+    try:
+        session.use_role(producer_role)
+        fs = FeatureStore(session, database, schema, warehouse, creation_mode=CreationMode.CREATE_IF_NOT_EXIST)
+    finally:
+        session.use_role(current_role)
+    # Do any post-FeatureStore.__init__() privilege setup
+    _configure_post_init_privileges(session, session_info, producer_role, consumer_role)
+    return fs

snowflake-ml-python 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl

snowflake-ml-python 1.4.1py3-none-any.whl → 1.5.1py3-none-any.whl