PyPI - wedata-feature-engineering - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

wedata-feature-engineering 0.1.5py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

wedata/__init__.py +1 -1
wedata/feature_store/client.py +113 -41
wedata/feature_store/constants/constants.py +19 -0
wedata/feature_store/entities/column_info.py +4 -4
wedata/feature_store/entities/feature_lookup.py +5 -1
wedata/feature_store/entities/feature_spec.py +46 -46
wedata/feature_store/entities/feature_table.py +42 -99
wedata/feature_store/entities/training_set.py +13 -12
wedata/feature_store/feature_table_client/feature_table_client.py +85 -30
wedata/feature_store/spark_client/spark_client.py +30 -56
wedata/feature_store/training_set_client/training_set_client.py +209 -38
wedata/feature_store/utils/common_utils.py +213 -3
wedata/feature_store/utils/feature_lookup_utils.py +6 -6
wedata/feature_store/utils/feature_spec_utils.py +6 -6
wedata/feature_store/utils/feature_utils.py +5 -5
wedata/feature_store/utils/on_demand_utils.py +107 -0
wedata/feature_store/utils/schema_utils.py +1 -1
wedata/feature_store/utils/signature_utils.py +205 -0
wedata/feature_store/utils/training_set_utils.py +18 -19
wedata/feature_store/utils/uc_utils.py +1 -1
{wedata_feature_engineering-0.1.5.dist-info → wedata_feature_engineering-0.1.6.dist-info}/METADATA +1 -1
wedata_feature_engineering-0.1.6.dist-info/RECORD +43 -0
feature_store/__init__.py +0 -6
feature_store/client.py +0 -169
feature_store/constants/__init__.py +0 -0
feature_store/constants/constants.py +0 -28
feature_store/entities/__init__.py +0 -0
feature_store/entities/column_info.py +0 -117
feature_store/entities/data_type.py +0 -92
feature_store/entities/environment_variables.py +0 -55
feature_store/entities/feature.py +0 -53
feature_store/entities/feature_column_info.py +0 -64
feature_store/entities/feature_function.py +0 -55
feature_store/entities/feature_lookup.py +0 -179
feature_store/entities/feature_spec.py +0 -454
feature_store/entities/feature_spec_constants.py +0 -25
feature_store/entities/feature_table.py +0 -164
feature_store/entities/feature_table_info.py +0 -40
feature_store/entities/function_info.py +0 -184
feature_store/entities/on_demand_column_info.py +0 -44
feature_store/entities/source_data_column_info.py +0 -21
feature_store/entities/training_set.py +0 -134
feature_store/feature_table_client/__init__.py +0 -0
feature_store/feature_table_client/feature_table_client.py +0 -313
feature_store/spark_client/__init__.py +0 -0
feature_store/spark_client/spark_client.py +0 -286
feature_store/training_set_client/__init__.py +0 -0
feature_store/training_set_client/training_set_client.py +0 -196
feature_store/utils/__init__.py +0 -0
feature_store/utils/common_utils.py +0 -96
feature_store/utils/feature_lookup_utils.py +0 -570
feature_store/utils/feature_spec_utils.py +0 -286
feature_store/utils/feature_utils.py +0 -73
feature_store/utils/schema_utils.py +0 -117
feature_store/utils/topological_sort.py +0 -158
feature_store/utils/training_set_utils.py +0 -580
feature_store/utils/uc_utils.py +0 -281
feature_store/utils/utils.py +0 -252
feature_store/utils/validation_utils.py +0 -55
wedata/feature_store/utils/utils.py +0 -252
wedata_feature_engineering-0.1.5.dist-info/RECORD +0 -79
{wedata_feature_engineering-0.1.5.dist-info → wedata_feature_engineering-0.1.6.dist-info}/WHEEL +0 -0
{wedata_feature_engineering-0.1.5.dist-info → wedata_feature_engineering-0.1.6.dist-info}/top_level.txt +0 -0

wedata/feature_store/utils/training_set_utils.py CHANGED Viewed

@@ -4,21 +4,19 @@ from typing import Dict, List, Optional, Set
 from pyspark.sql import DataFrame
-from feature_store.entities.column_info import ColumnInfo
-from feature_store.entities.feature import Feature
-from feature_store.entities.feature_column_info import FeatureColumnInfo
-from feature_store.entities.feature_lookup import FeatureLookup
-from feature_store.entities.feature_spec import FeatureSpec
-from feature_store.entities.feature_table import FeatureTable
-from feature_store.entities.feature_table_info import FeatureTableInfo
-from feature_store.entities.function_info import FunctionInfo
-from feature_store.entities.on_demand_column_info import OnDemandColumnInfo
-from feature_store.entities.source_data_column_info import SourceDataColumnInfo
-from feature_store.constants.constants import _ERROR, _WARN
-from feature_store.utils import common_utils, validation_utils, uc_utils, schema_utils, utils
-from feature_store.utils.feature_spec_utils import assign_topological_ordering
+from wedata.feature_store.entities.column_info import ColumnInfo
+from wedata.feature_store.entities.feature import Feature
+from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
+from wedata.feature_store.entities.feature_lookup import FeatureLookup
+from wedata.feature_store.entities.feature_spec import FeatureSpec
+from wedata.feature_store.entities.feature_table import FeatureTable
+from wedata.feature_store.entities.feature_table_info import FeatureTableInfo
+from wedata.feature_store.entities.function_info import FunctionInfo
+from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
+from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
+from wedata.feature_store.utils import common_utils, validation_utils, uc_utils, schema_utils
+from wedata.feature_store.utils.feature_spec_utils import assign_topological_ordering
 _logger = logging.getLogger(__name__)
@@ -99,9 +97,9 @@ def _explode_feature_lookup(
         FeatureColumnInfo(
             table_name=feature_lookup.table_name,
             feature_name=feature_name,
-            lookup_key=utils.as_list(feature_lookup.lookup_key),
+            lookup_key=common_utils.as_list(feature_lookup.lookup_key),
             output_name=(feature_lookup._get_output_name(feature_name)),
-            timestamp_lookup_key=utils.as_list(
+            timestamp_lookup_key=common_utils.as_list(
                 feature_lookup.timestamp_lookup_key, default=[]
             ),
         )
@@ -280,13 +278,14 @@ def get_table_metadata(
     feature_table_metadata_map = get_feature_table_metadata_for_tables(
         spark_client, table_names=table_names
     )
     feature_table_data_map = load_feature_data_for_tables(
         spark_client, table_names=table_names
     )
     return _FeatureTableMetadata(
         feature_table_features_map,
         feature_table_metadata_map,
-        feature_table_data_map,
+        feature_table_data_map
     )
@@ -515,7 +514,7 @@ def build_feature_spec(
         for table_name in consumed_table_names
     ]
     function_infos = [
-        FunctionInfo(udf_name=udf_name) for udf_name in consumed_udf_names
+        FunctionInfo(full_name=udf_name) for udf_name in consumed_udf_names
     ]
     # Build FeatureSpec

wedata/feature_store/utils/uc_utils.py CHANGED Viewed

@@ -2,7 +2,7 @@ import copy
 import re
 from typing import Optional, Set
-from feature_store.entities.feature_spec import FeatureSpec
+from wedata.feature_store.entities.feature_spec import FeatureSpec
 SINGLE_LEVEL_NAMESPACE_REGEX = r"^[^\. \/\x00-\x1F\x7F]+$"
 TWO_LEVEL_NAMESPACE_REGEX = r"^[^\. \/\x00-\x1F\x7F]+(\.[^\. \/\x00-\x1F\x7F]+)$"

{wedata_feature_engineering-0.1.5.dist-info → wedata_feature_engineering-0.1.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: wedata-feature-engineering
-Version: 0.1.5
+Version: 0.1.6
 Summary: Wedata Feature Engineering Library
 Home-page:
 Author: meahqian

wedata_feature_engineering-0.1.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,43 @@
+wedata/__init__.py,sha256=26GwucASB9KsmU109sN-VKotEKp1WZYQDGP0wgWZrzY,101
+wedata/feature_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wedata/feature_store/client.py,sha256=7a-9C8HIBHnQNQD6I4W3UtBQwkJE8G-Q7N24zydjpkY,8100
+wedata/feature_store/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wedata/feature_store/constants/constants.py,sha256=b4tgcSt66YIq0Fg7pMbqvbqPOI77Cz8znLVZ4ihUKss,1479
+wedata/feature_store/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wedata/feature_store/entities/column_info.py,sha256=sU_WD9U0qse0M2speowLY30qSE6j4_57PuvtdPkwiFY,4192
+wedata/feature_store/entities/data_type.py,sha256=VpHS6Fr3TphQQ8NbAcEnDJ-8eOZV6ivYuWxv3pAM2RM,3394
+wedata/feature_store/entities/environment_variables.py,sha256=ZEFml5H9MQuzBKM074mUrFYu-Sga4Knmxqiwpke2WGc,1679
+wedata/feature_store/entities/feature.py,sha256=wX8fTBlJq3GYdj9rrBDCY3kFgcVBBAiOOZdxEhnQkNQ,1241
+wedata/feature_store/entities/feature_column_info.py,sha256=-TGxRafYUaNKe0YzHus2XbfRaVrMv7pcffMdbtTT4nA,2031
+wedata/feature_store/entities/feature_function.py,sha256=R17INrCE-U_Uj9KLbFz69aYlOkTETTwQHMMo470F4lQ,1865
+wedata/feature_store/entities/feature_lookup.py,sha256=YjYz8kLq42doFbgPzpmm1r3GPhPYkLsIss4H71x-KAo,8009
+wedata/feature_store/entities/feature_spec.py,sha256=60RUOOe9y_Xsd1I3xqq4NZYnaox4_jjwSyGRTKXLiIw,20041
+wedata/feature_store/entities/feature_spec_constants.py,sha256=YWDBfRiNDe6fUJFUBo3V4WYg2xsljoPAE-ZejfFZCgM,785
+wedata/feature_store/entities/feature_table.py,sha256=dHZHSDPD4HJ2XanLVIrVTkaCYUeqZ6eWEpA0d3YO71g,4010
+wedata/feature_store/entities/feature_table_info.py,sha256=2vUaVdW_jw1dRAlmJWvBRueuMeuqWu_NYB9SlxLI7Uw,1126
+wedata/feature_store/entities/function_info.py,sha256=l0kmiq2R_QNfSMJ7y0xZohlMiemgYSr1dN5vzV8ijIs,7314
+wedata/feature_store/entities/on_demand_column_info.py,sha256=Eh5ieaj1TxC7DG6ipBZzH2ZyY0bwkLrDOkuZjgYr4gY,1297
+wedata/feature_store/entities/source_data_column_info.py,sha256=a9jQOJvehwDIrKPwsP6W9YRBSPNK2nZYypE6-p80CwA,542
+wedata/feature_store/entities/training_set.py,sha256=ylt1h6Z_xU8hKYvnvd80CeewTGSN68-_kvFpoliwH7s,5679
+wedata/feature_store/feature_table_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wedata/feature_store/feature_table_client/feature_table_client.py,sha256=nrnY3FLQnMhW1BzByDjjfU89hirgaKlg2l2tAfcjvyM,12138
+wedata/feature_store/spark_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wedata/feature_store/spark_client/spark_client.py,sha256=DBCYjLsFrIVRvLErTNyfLIHRul3v0y9uZIY2JR1N92s,10323
+wedata/feature_store/training_set_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wedata/feature_store/training_set_client/training_set_client.py,sha256=gHeZU0rvvUcyNTfroXD3LAinFPdhDpnwTOIWj6z84Tc,15102
+wedata/feature_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wedata/feature_store/utils/common_utils.py,sha256=rmGXfBoEyDMlfqd7lxpEmKJaLoQ-d-ufWpAcE8nSHqA,10009
+wedata/feature_store/utils/feature_lookup_utils.py,sha256=da6ULwf5D-FRVpZoNyag1rroBfq_XPSH4a3uEMB_8io,22372
+wedata/feature_store/utils/feature_spec_utils.py,sha256=GFwc-WT6nN1tnal5F2c0bgVDRhH-YW58S0GaHBPZEsQ,11624
+wedata/feature_store/utils/feature_utils.py,sha256=KKq28bVB_lCuhnR9Hk6JegJBOVgcelWlvrRM-F9onkA,2796
+wedata/feature_store/utils/on_demand_utils.py,sha256=pazZRG5c0Se08MV_inBddIeX4Q9xlVN_H9SC_WK3xzs,4260
+wedata/feature_store/utils/schema_utils.py,sha256=y6EYY1pUxjVg6MP4C7avdW8ZEBBaDo1YTV2CmPF4i8o,4491
+wedata/feature_store/utils/signature_utils.py,sha256=_4_mo1Qlzklp-JrISMS3Jv89MPbaH6rz_cRDvJqFNXM,7957
+wedata/feature_store/utils/topological_sort.py,sha256=ebzKxmxeCLk9seB1zR0ASCGXsZsa-DjxJeTc4KUadtg,6475
+wedata/feature_store/utils/training_set_utils.py,sha256=MYsPZS1d9HKswHgjgxD8K7H9N3dWPyyTTx20Mkp4PVU,22497
+wedata/feature_store/utils/uc_utils.py,sha256=A-W8Cd8yvTmAMEWaHeWmGmcIDMvUtjAfx2G2x_di1QE,10774
+wedata/feature_store/utils/validation_utils.py,sha256=FslvrNs3kstqvM6THScLOluEE6O9RWlDrD9xiihTzlw,1735
+wedata_feature_engineering-0.1.6.dist-info/METADATA,sha256=orxNq_A9F8FcSWYn6wTY1pQ2KtqNVIREvGziUnNa1ys,493
+wedata_feature_engineering-0.1.6.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+wedata_feature_engineering-0.1.6.dist-info/top_level.txt,sha256=Xa0v1rh__RvfVTVDirW5r5UBKg7ZO_iuTeXfp8MNo2A,7
+wedata_feature_engineering-0.1.6.dist-info/RECORD,,

feature_store/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-"""
-WeData Feature Engineering
-A toolkit for automated feature engineering
-"""
-__version__ = "0.1.0"

feature_store/client.py DELETED Viewed

@@ -1,169 +0,0 @@
-"""
-Wedata FeatureStoreClient Python实现
-"""
-from __future__ import annotations
-from typing import Union, List, Dict, Optional, Any
-from pyspark.sql import DataFrame, SparkSession
-from pyspark.sql.streaming import StreamingQuery
-from pyspark.sql.types import StructType
-from feature_store.constants.constants import APPEND, DEFAULT_WRITE_STREAM_TRIGGER
-from feature_store.entities.feature_function import FeatureFunction
-from feature_store.entities.feature_lookup import FeatureLookup
-from feature_store.entities.training_set import TrainingSet
-from feature_store.feature_table_client.feature_table_client import FeatureTableClient
-from feature_store.spark_client.spark_client import SparkClient
-from feature_store.training_set_client.training_set_client import TrainingSetClient
-from feature_store.utils.feature_utils import format_feature_lookups_and_functions
-class FeatureStoreClient:
-    """特征存储统一客户端，提供特征全生命周期管理能力"""
-    def __init__(self, spark: SparkSession):
-        """
-        :param spark: 已初始化的SparkSession对象
-        """
-        self._spark = spark
-        self._spark_client = SparkClient(spark)
-        self._feature_table_client = FeatureTableClient(spark)
-    def create_table(
-            self,
-            name: str,
-            primary_keys: Union[str, List[str]],
-            df: Optional[DataFrame] = None,
-            *,
-            timestamp_keys: Union[str, List[str], None] = None,
-            partition_columns: Union[str, List[str], None] = None,
-            schema: Optional[StructType] = None,
-            description: Optional[str] = None,
-            tags: Optional[Dict[str, str]] = None
-    ):
-        """
-        创建特征表（支持批流数据写入）
-        Args:
-            name: 特征表全称（格式：<table>）
-            primary_keys: 主键列名（支持复合主键）
-            df: 初始数据（可选，用于推断schema）
-            timestamp_keys: 时间戳键（用于时态特征）
-            partition_columns: 分区列（优化存储查询）
-            description: 业务描述
-            tags: 业务标签
-        Returns:
-            FeatureTable实例
-        Raises:
-            ValueError: 当schema与数据不匹配时
-        """
-        return self._feature_table_client.create_table(
-            name=name,
-            primary_keys=primary_keys,
-            df=df,
-            timestamp_keys=timestamp_keys,
-            partition_columns=partition_columns,
-            schema=schema,
-            description=description,
-            tags=tags
-        )
-    def read_table(self, name: str) -> DataFrame:
-        """
-        读取特征表数据
-        Args:
-            name: 特征表名称
-        Returns:
-            DataFrame: 包含特征表数据的DataFrame对象
-        """
-        return self._feature_table_client.read_table(name)
-    def drop_table(self, name: str) -> None:
-        """
-        删除特征表
-        Args:
-            name: 要删除的特征表名称
-        Returns:
-            None
-        """
-        return self._feature_table_client.drop_table(name)
-    def create_training_set(
-            self,
-            df: DataFrame,
-            feature_lookups: List[Union[FeatureLookup, FeatureFunction]],
-            label: Union[str, List[str], None],
-            exclude_columns: Optional[List[str]] = None,
-            **kwargs,
-    ) -> TrainingSet:
-        """
-        创建训练集
-        Args:
-            df: 基础数据
-            feature_lookups: 特征查询列表
-            label: 标签列名
-            exclude_columns: 排除列名
-        Returns:
-            TrainingSet实例
-        """
-        if exclude_columns is None:
-            exclude_columns = []
-        features = feature_lookups
-        del feature_lookups
-        features = format_feature_lookups_and_functions(self._spark_client, features)
-        # 创建TrainingSetClient实例
-        training_set_client = TrainingSetClient(self._spark_client)
-        return training_set_client.create_training_set_from_feature_lookups(
-            df=df,
-            feature_lookups=features,
-            label=label,
-            exclude_columns=exclude_columns,
-            **kwargs
-        )
-    def write_table(
-            self,
-            name: str,
-            df: DataFrame,
-            mode: str = APPEND,
-            checkpoint_location: Optional[str] = None,
-            trigger: Dict[str, Any] = DEFAULT_WRITE_STREAM_TRIGGER,
-    ) -> Optional[StreamingQuery]:
-        """
-        写入数据到特征表（支持批处理和流式处理）
-        Args:
-            name: 特征表名称
-            df: 要写入的数据DataFrame
-            mode: 写入模式（默认追加）
-            checkpoint_location: 流式处理的检查点位置（可选）
-            trigger: 流式处理触发器配置（默认使用系统预设）
-        Returns:
-            如果是流式写入返回StreamingQuery对象，否则返回None
-        """
-        return self._feature_table_client.write_table(
-            name=name,
-            df=df,
-            mode=mode,
-            checkpoint_location=checkpoint_location,
-            trigger=trigger,
-        )

feature_store/constants/__init__.py DELETED Viewed

File without changes

feature_store/constants/constants.py DELETED Viewed

@@ -1,28 +0,0 @@
-OVERWRITE = "overwrite"
-APPEND = "append"
-PATH = "path"
-TABLE = "table"
-CUSTOM = "custom"
-PREDICTION_COLUMN_NAME = "prediction"
-MODEL_DATA_PATH_ROOT = "feature_store"
-UTF8_BYTES_PER_CHAR = 4
-MAX_PRIMARY_KEY_STRING_LENGTH_CHARS = 100
-MAX_PRIMARY_KEY_STRING_LENGTH_BYTES = (
-    MAX_PRIMARY_KEY_STRING_LENGTH_CHARS * UTF8_BYTES_PER_CHAR
-)
-STREAMING_TRIGGER_CONTINUOUS = "continuous"
-STREAMING_TRIGGER_ONCE = "once"
-STREAMING_TRIGGER_PROCESSING_TIME = "processingTime"
-DEFAULT_WRITE_STREAM_TRIGGER = {STREAMING_TRIGGER_PROCESSING_TIME: "5 seconds"}
-_DEFAULT_PUBLISH_STREAM_TRIGGER = {STREAMING_TRIGGER_PROCESSING_TIME: "5 minutes"}
-_WARN = "WARN"
-_ERROR = "ERROR"
-_SOURCE_FORMAT_DELTA = "delta"
-_NO_RESULT_TYPE_PASSED = "NO_RESULT_TYPE"
-_USE_SPARK_NATIVE_JOIN = "use_spark_native_join"
-_PREBUILT_ENV_URI = "prebuilt_env_uri"

feature_store/entities/__init__.py DELETED Viewed

File without changes

feature_store/entities/column_info.py DELETED Viewed

@@ -1,117 +0,0 @@
-import copy
-from typing import Optional, Union
-from feature_store.entities.feature_column_info import FeatureColumnInfo
-from feature_store.entities.feature_spec_constants import SOURCE_DATA_COLUMN_INFO, FEATURE_COLUMN_INFO, \
-    ON_DEMAND_COLUMN_INFO
-from feature_store.entities.on_demand_column_info import OnDemandColumnInfo
-from feature_store.entities.source_data_column_info import SourceDataColumnInfo
-class ColumnInfo:
-    """
-    ColumnInfo's structure and properties are mapped 1:1 to the ColumnInfo proto message, unless specified otherwise.
-    """
-    def __init__(
-        self,
-        info: Union[SourceDataColumnInfo, FeatureColumnInfo, OnDemandColumnInfo],
-        include: bool,
-        data_type: Optional[str] = None,
-        topological_ordering: Optional[int] = None,
-    ):
-        if not isinstance(
-            info, (SourceDataColumnInfo, FeatureColumnInfo, OnDemandColumnInfo)
-        ):
-            raise ValueError(
-                "info must be one of SourceDataColumnInfo, FeatureColumnInfo, OnDemandColumnInfo."
-            )
-        self._info = info
-        self._include = include
-        self._data_type = data_type
-        self._topological_ordering = topological_ordering
-    @property
-    def info(
-        self,
-    ) -> Union[SourceDataColumnInfo, FeatureColumnInfo, OnDemandColumnInfo]:
-        return self._info
-    @property
-    def include(self) -> bool:
-        return self._include
-    @property
-    def data_type(self) -> Optional[str]:
-        """
-        FeatureSpecs before v7 are not required to have data types.
-        """
-        return self._data_type
-    @property
-    def topological_ordering(self) -> Optional[int]:
-        """
-        FeatureSpecs before v8 are not required to have topological ordering.
-        """
-        return self._topological_ordering
-    @property
-    def output_name(self) -> str:
-        """
-        This field does not exist in the proto, and is provided for convenience.
-        """
-        return self.info.output_name
-    def with_topological_ordering(self, ordering: int):
-        new_column_info = copy.copy(self)
-        new_column_info._topological_ordering = ordering
-        return new_column_info
-    @classmethod
-    def from_proto(cls, column_info_proto):
-        if column_info_proto.HasField(SOURCE_DATA_COLUMN_INFO):
-            info = SourceDataColumnInfo.from_proto(
-                column_info_proto.source_data_column_info
-            )
-        elif column_info_proto.HasField(FEATURE_COLUMN_INFO):
-            info = FeatureColumnInfo.from_proto(column_info_proto.feature_column_info)
-        elif column_info_proto.HasField(ON_DEMAND_COLUMN_INFO):
-            info = OnDemandColumnInfo.from_proto(
-                column_info_proto.on_demand_column_info
-            )
-        else:
-            raise ValueError("Unsupported info type: " + str(column_info_proto))
-        data_type = (
-            column_info_proto.data_type
-            if column_info_proto.HasField("data_type")
-            else None
-        )
-        topological_ordering = (
-            column_info_proto.topological_ordering
-            if column_info_proto.HasField("topological_ordering")
-            else None
-        )
-        return ColumnInfo(
-            info=info,
-            include=column_info_proto.include,
-            data_type=data_type,
-            topological_ordering=topological_ordering,
-        )
-    # def to_proto(self):
-    #     column_info = ProtoColumnInfo(
-    #         include=self.include,
-    #         data_type=self.data_type,
-    #         topological_ordering=self.topological_ordering,
-    #     )
-    #     if isinstance(self.info, SourceDataColumnInfo):
-    #         column_info.source_data_column_info.CopyFrom(self.info.to_proto())
-    #     elif isinstance(self.info, FeatureColumnInfo):
-    #         column_info.feature_column_info.CopyFrom(self.info.to_proto())
-    #     elif isinstance(self.info, OnDemandColumnInfo):
-    #         column_info.on_demand_column_info.CopyFrom(self.info.to_proto())
-    #     else:
-    #         raise ValueError("Unsupported info type: " + str(self.info))
-    #
-    #     return column_info

feature_store/entities/data_type.py DELETED Viewed

@@ -1,92 +0,0 @@
-import json
-import re
-from typing import Any
-from pyspark.sql.types import ArrayType, DataType, DecimalType, MapType, StructType
-class DataType(_ProtoEnumEntity):
-    """Online store types."""
-    INTEGER = ProtoDataType.Value("INTEGER")
-    FLOAT = ProtoDataType.Value("FLOAT")
-    BOOLEAN = ProtoDataType.Value("BOOLEAN")
-    STRING = ProtoDataType.Value("STRING")
-    DOUBLE = ProtoDataType.Value("DOUBLE")
-    LONG = ProtoDataType.Value("LONG")
-    TIMESTAMP = ProtoDataType.Value("TIMESTAMP")
-    DATE = ProtoDataType.Value("DATE")
-    SHORT = ProtoDataType.Value("SHORT")
-    ARRAY = ProtoDataType.Value("ARRAY")
-    MAP = ProtoDataType.Value("MAP")
-    BINARY = ProtoDataType.Value("BINARY")
-    DECIMAL = ProtoDataType.Value("DECIMAL")
-    STRUCT = ProtoDataType.Value("STRUCT")
-    _FIXED_DECIMAL = re.compile("decimal\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)")
-    @classmethod
-    def _enum_type(cls) -> Any:
-        return ProtoDataType
-    @classmethod
-    def from_spark_type(cls, spark_type):
-        return cls.from_string(spark_type.typeName())
-    @classmethod
-    def spark_type_to_string(cls, spark_type):
-        return DataType.to_string(DataType.from_spark_type(spark_type))
-    @classmethod
-    def top_level_type_supported(cls, spark_type: DataType) -> bool:
-        """
-        Checks whether the provided Spark data type is supported by Feature Store, only considering
-        the top-level type for nested data types.
-        Details on nested types:
-          ArrayType: The elementType is not checked. Will return True.
-          MapType: The keyType and valueType are not checked. Will return True.
-          StructType: The struct fieds are not checked. Will return True.
-        """
-        cls.init()
-        return spark_type.typeName().upper() in cls._STRING_TO_ENUM
-    @classmethod
-    def to_complex_spark_type(cls, json_value):
-        """
-        Constructs a complex Spark DataType from its compact JSON representation.
-        Examples:
-            - Input: '"decimal(1,2)"'
-              Output: DecimalType(1,2)
-            - Input: '{"containsNull":false,"elementType":"integer","type":"array"}'
-              Output: ArrayType(IntegerType,false)
-            - Input: '{"keyType":"integer","type":"map","valueContainsNull":True,"valueType":"integer"}'
-              Output: MapType(IntegerType,IntegerType,true)
-        """
-        if not json_value:
-            raise ValueError("Empty JSON value cannot be converted to Spark DataType")
-        json_data = json.loads(json_value)
-        if not isinstance(json_data, dict):
-            # DecimalType does not have fromJson() method
-            if json_value == "decimal":
-                return DecimalType()
-            if cls._FIXED_DECIMAL.match(json_data):
-                m = cls._FIXED_DECIMAL.match(json_data)
-                return DecimalType(int(m.group(1)), int(m.group(2)))
-        if json_data["type"].upper() == cls.to_string(cls.ARRAY):
-            return ArrayType.fromJson(json_data)
-        if json_data["type"].upper() == cls.to_string(cls.MAP):
-            return MapType.fromJson(json_data)
-        if json_data["type"].upper() == cls.to_string(cls.STRUCT):
-            return StructType.fromJson(json_data)
-        else:
-            raise ValueError(
-                f"Spark type {json_data['type']} cannot be converted to a complex Spark DataType"
-            )

feature_store/entities/environment_variables.py DELETED Viewed

@@ -1,55 +0,0 @@
-import os
-class _EnvironmentVariable:
-    """
-    Represents an environment variable for the feature store client for custom configurations as needed.
-    """
-    def __init__(self, name, type_, default):
-        self.name = name
-        self.type = type_
-        self.default = default
-    @property
-    def defined(self):
-        return self.name in os.environ
-    def get_raw(self):
-        return os.getenv(self.name)
-    def set(self, value):
-        os.environ[self.name] = str(value)
-    def unset(self):
-        os.environ.pop(self.name, None)
-    def get(self):
-        """
-        Reads the value of the environment variable if it exists and converts it to the desired
-        type. Otherwise, returns the default value.
-        """
-        if (val := self.get_raw()) is not None:
-            try:
-                return self.type(val)
-            except Exception as e:
-                raise ValueError(
-                    f"Failed to convert {val!r} to {self.type} for {self.name}: {e}"
-                )
-        return self.default
-    def __str__(self):
-        return f"{self.name} (default: {self.default}, type: {self.type.__name__})"
-    def __repr__(self):
-        return repr(self.name)
-    def __format__(self, format_spec: str) -> str:
-        return self.name.__format__(format_spec)
-# The threshold (in MB) where a broadcast join will be performed for the asof join for point in time feature join
-# Default is 20MB as benchmarks show diminishing returns with broadcast past this value.The default spark broadcast join threshold is 10MB
-BROADCAST_JOIN_THRESHOLD = _EnvironmentVariable(
-    "BROADCAST_JOIN_THRESHOLD", int, 20 * 1024 * 1024
-)

feature_store/entities/feature.py DELETED Viewed

@@ -1,53 +0,0 @@
-class Feature:
-    def __init__(
-        self,
-        feature_table,
-        feature_id,
-        name,
-        data_type,
-        description,
-        data_type_details=None,
-    ):
-        self._feature_table = feature_table
-        self._name = name
-        self._data_type = data_type
-        self._description = description
-        self._data_type_details = data_type_details
-        self._feature_id = feature_id
-    @property
-    def feature_table(self):
-        return self._feature_table
-    @property
-    def feature_id(self):
-        return self._feature_id
-    @property
-    def name(self):
-        return self._name
-    @property
-    def data_type(self):
-        return self._data_type
-    @property
-    def data_type_details(self):
-        return self._data_type_details
-    @property
-    def description(self):
-        return self._description
-    @classmethod
-    def from_proto(cls, feature_proto):
-        return cls(
-            feature_table=feature_proto.table,
-            feature_id=feature_proto.id,
-            name=feature_proto.name,
-            data_type=feature_proto.data_type,
-            data_type_details=feature_proto.data_type_details,
-            description=feature_proto.description,
-        )

wedata-feature-engineering 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

wedata-feature-engineering 0.1.5py3-none-any.whl → 0.1.6py3-none-any.whl