tencent-wedata-feature-engineering-dev 0.1.50__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tencent-wedata-feature-engineering-dev might be problematic. Click here for more details.

Files changed (73) hide show
  1. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/PKG-INFO +11 -3
  2. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/setup.py +2 -6
  3. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/tencent_wedata_feature_engineering_dev.egg-info/PKG-INFO +11 -3
  4. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/tencent_wedata_feature_engineering_dev.egg-info/SOURCES.txt +2 -21
  5. tencent_wedata_feature_engineering_dev-0.2.0/tencent_wedata_feature_engineering_dev.egg-info/requires.txt +1 -0
  6. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/client.py +28 -92
  7. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/constants/constants.py +2 -5
  8. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/feature_lookup.py +0 -17
  9. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/feature_spec.py +2 -2
  10. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/feature_table.py +1 -5
  11. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/function_info.py +4 -1
  12. tencent_wedata_feature_engineering_dev-0.2.0/wedata/feature_store/feature_table_client/feature_table_client.py +510 -0
  13. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/spark_client/spark_client.py +15 -41
  14. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/training_set_client/training_set_client.py +10 -9
  15. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/common_utils.py +4 -48
  16. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/feature_lookup_utils.py +43 -37
  17. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/feature_spec_utils.py +1 -1
  18. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/uc_utils.py +1 -1
  19. tencent-wedata-feature-engineering-dev-0.1.50/tencent_wedata_feature_engineering_dev.egg-info/requires.txt +0 -5
  20. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/cloud_sdk_client/client.py +0 -108
  21. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/cloud_sdk_client/models.py +0 -686
  22. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/cloud_sdk_client/utils.py +0 -39
  23. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/common/log/logger.py +0 -40
  24. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/common/store_config/redis.py +0 -48
  25. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/constants/engine_types.py +0 -34
  26. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/feast_client/feast_client.py +0 -487
  27. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/feature_table_client/__init__.py +0 -0
  28. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/feature_table_client/feature_table_client.py +0 -985
  29. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/spark_client/__init__.py +0 -0
  30. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/training_set_client/__init__.py +0 -0
  31. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/utils/__init__.py +0 -0
  32. tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/utils/env_utils.py +0 -108
  33. tencent-wedata-feature-engineering-dev-0.1.50/wedata/tempo/__init__.py +0 -0
  34. tencent-wedata-feature-engineering-dev-0.1.50/wedata/tempo/interpol.py +0 -448
  35. tencent-wedata-feature-engineering-dev-0.1.50/wedata/tempo/intervals.py +0 -1331
  36. tencent-wedata-feature-engineering-dev-0.1.50/wedata/tempo/io.py +0 -61
  37. tencent-wedata-feature-engineering-dev-0.1.50/wedata/tempo/ml.py +0 -129
  38. tencent-wedata-feature-engineering-dev-0.1.50/wedata/tempo/resample.py +0 -318
  39. tencent-wedata-feature-engineering-dev-0.1.50/wedata/tempo/tsdf.py +0 -1720
  40. tencent-wedata-feature-engineering-dev-0.1.50/wedata/tempo/utils.py +0 -254
  41. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/README.md +0 -0
  42. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/setup.cfg +0 -0
  43. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/tencent_wedata_feature_engineering_dev.egg-info/dependency_links.txt +0 -0
  44. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/tencent_wedata_feature_engineering_dev.egg-info/top_level.txt +0 -0
  45. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/__init__.py +0 -0
  46. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/__init__.py +0 -0
  47. {tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/cloud_sdk_client → tencent_wedata_feature_engineering_dev-0.2.0/wedata/feature_store/common}/__init__.py +0 -0
  48. {tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/common → tencent_wedata_feature_engineering_dev-0.2.0/wedata/feature_store/common/protos}/__init__.py +0 -0
  49. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/common/protos/feature_store_pb2.py +0 -0
  50. {tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/common/log → tencent_wedata_feature_engineering_dev-0.2.0/wedata/feature_store/constants}/__init__.py +0 -0
  51. {tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/common/protos → tencent_wedata_feature_engineering_dev-0.2.0/wedata/feature_store/entities}/__init__.py +0 -0
  52. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/column_info.py +0 -0
  53. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/environment_variables.py +0 -0
  54. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/feature.py +0 -0
  55. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/feature_column_info.py +0 -0
  56. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/feature_function.py +0 -0
  57. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/feature_spec_constants.py +0 -0
  58. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/feature_table_info.py +0 -0
  59. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/on_demand_column_info.py +0 -0
  60. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/source_data_column_info.py +0 -0
  61. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/entities/training_set.py +0 -0
  62. {tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/common/store_config → tencent_wedata_feature_engineering_dev-0.2.0/wedata/feature_store/feature_table_client}/__init__.py +0 -0
  63. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/mlflow_model.py +0 -0
  64. {tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/constants → tencent_wedata_feature_engineering_dev-0.2.0/wedata/feature_store/spark_client}/__init__.py +0 -0
  65. {tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/entities → tencent_wedata_feature_engineering_dev-0.2.0/wedata/feature_store/training_set_client}/__init__.py +0 -0
  66. {tencent-wedata-feature-engineering-dev-0.1.50/wedata/feature_store/feast_client → tencent_wedata_feature_engineering_dev-0.2.0/wedata/feature_store/utils}/__init__.py +0 -0
  67. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/feature_utils.py +0 -0
  68. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/on_demand_utils.py +0 -0
  69. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/schema_utils.py +0 -0
  70. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/signature_utils.py +0 -0
  71. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/topological_sort.py +0 -0
  72. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/training_set_utils.py +0 -0
  73. {tencent-wedata-feature-engineering-dev-0.1.50 → tencent_wedata_feature_engineering_dev-0.2.0}/wedata/feature_store/utils/validation_utils.py +0 -0
@@ -1,7 +1,7 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: tencent-wedata-feature-engineering-dev
3
- Version: 0.1.50
4
- Summary: Wedata Feature Engineering Library Development
3
+ Version: 0.2.0
4
+ Summary: Wedata Feature Engineering Library
5
5
  Home-page:
6
6
  Author: meahqian
7
7
  Author-email:
@@ -11,3 +11,11 @@ Classifier: License :: OSI Approved :: Apache Software License
11
11
  Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.7
13
13
  Description-Content-Type: text/markdown
14
+ Requires-Dist: pandas>=1.0.0
15
+ Dynamic: author
16
+ Dynamic: classifier
17
+ Dynamic: description-content-type
18
+ Dynamic: license
19
+ Dynamic: requires-dist
20
+ Dynamic: requires-python
21
+ Dynamic: summary
@@ -11,16 +11,12 @@ setup(
11
11
  version=version["__version__"],
12
12
  packages=find_packages(include=['wedata', 'wedata.*']),
13
13
  install_requires=[
14
- 'pandas>=1.0.0',
15
- 'feast[redis]==0.49.0',
16
- 'grpcio==1.74.0',
17
- 'tencentcloud-sdk-python',
18
- 'ipython'
14
+ 'pandas>=1.0.0'
19
15
  ],
20
16
  python_requires='>=3.7',
21
17
  author="meahqian",
22
18
  author_email="",
23
- description="Wedata Feature Engineering Library Development",
19
+ description="Wedata Feature Engineering Library",
24
20
  long_description=open("README.md").read(),
25
21
  long_description_content_type="text/markdown",
26
22
  license="Apache 2.0",
@@ -1,7 +1,7 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: tencent-wedata-feature-engineering-dev
3
- Version: 0.1.50
4
- Summary: Wedata Feature Engineering Library Development
3
+ Version: 0.2.0
4
+ Summary: Wedata Feature Engineering Library
5
5
  Home-page:
6
6
  Author: meahqian
7
7
  Author-email:
@@ -11,3 +11,11 @@ Classifier: License :: OSI Approved :: Apache Software License
11
11
  Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.7
13
13
  Description-Content-Type: text/markdown
14
+ Requires-Dist: pandas>=1.0.0
15
+ Dynamic: author
16
+ Dynamic: classifier
17
+ Dynamic: description-content-type
18
+ Dynamic: license
19
+ Dynamic: requires-dist
20
+ Dynamic: requires-python
21
+ Dynamic: summary
@@ -1,4 +1,5 @@
1
1
  README.md
2
+ setup.cfg
2
3
  setup.py
3
4
  tencent_wedata_feature_engineering_dev.egg-info/PKG-INFO
4
5
  tencent_wedata_feature_engineering_dev.egg-info/SOURCES.txt
@@ -9,20 +10,11 @@ wedata/__init__.py
9
10
  wedata/feature_store/__init__.py
10
11
  wedata/feature_store/client.py
11
12
  wedata/feature_store/mlflow_model.py
12
- wedata/feature_store/cloud_sdk_client/__init__.py
13
- wedata/feature_store/cloud_sdk_client/client.py
14
- wedata/feature_store/cloud_sdk_client/models.py
15
- wedata/feature_store/cloud_sdk_client/utils.py
16
13
  wedata/feature_store/common/__init__.py
17
- wedata/feature_store/common/log/__init__.py
18
- wedata/feature_store/common/log/logger.py
19
14
  wedata/feature_store/common/protos/__init__.py
20
15
  wedata/feature_store/common/protos/feature_store_pb2.py
21
- wedata/feature_store/common/store_config/__init__.py
22
- wedata/feature_store/common/store_config/redis.py
23
16
  wedata/feature_store/constants/__init__.py
24
17
  wedata/feature_store/constants/constants.py
25
- wedata/feature_store/constants/engine_types.py
26
18
  wedata/feature_store/entities/__init__.py
27
19
  wedata/feature_store/entities/column_info.py
28
20
  wedata/feature_store/entities/environment_variables.py
@@ -38,8 +30,6 @@ wedata/feature_store/entities/function_info.py
38
30
  wedata/feature_store/entities/on_demand_column_info.py
39
31
  wedata/feature_store/entities/source_data_column_info.py
40
32
  wedata/feature_store/entities/training_set.py
41
- wedata/feature_store/feast_client/__init__.py
42
- wedata/feature_store/feast_client/feast_client.py
43
33
  wedata/feature_store/feature_table_client/__init__.py
44
34
  wedata/feature_store/feature_table_client/feature_table_client.py
45
35
  wedata/feature_store/spark_client/__init__.py
@@ -48,7 +38,6 @@ wedata/feature_store/training_set_client/__init__.py
48
38
  wedata/feature_store/training_set_client/training_set_client.py
49
39
  wedata/feature_store/utils/__init__.py
50
40
  wedata/feature_store/utils/common_utils.py
51
- wedata/feature_store/utils/env_utils.py
52
41
  wedata/feature_store/utils/feature_lookup_utils.py
53
42
  wedata/feature_store/utils/feature_spec_utils.py
54
43
  wedata/feature_store/utils/feature_utils.py
@@ -58,12 +47,4 @@ wedata/feature_store/utils/signature_utils.py
58
47
  wedata/feature_store/utils/topological_sort.py
59
48
  wedata/feature_store/utils/training_set_utils.py
60
49
  wedata/feature_store/utils/uc_utils.py
61
- wedata/feature_store/utils/validation_utils.py
62
- wedata/tempo/__init__.py
63
- wedata/tempo/interpol.py
64
- wedata/tempo/intervals.py
65
- wedata/tempo/io.py
66
- wedata/tempo/ml.py
67
- wedata/tempo/resample.py
68
- wedata/tempo/tsdf.py
69
- wedata/tempo/utils.py
50
+ wedata/feature_store/utils/validation_utils.py
@@ -4,6 +4,7 @@ Wedata FeatureStoreClient Python实现
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
+ import os
7
8
  from types import ModuleType
8
9
  from typing import Union, List, Dict, Optional, Any
9
10
  from pyspark.sql import DataFrame, SparkSession
@@ -13,9 +14,6 @@ import mlflow
13
14
  from wedata.feature_store.constants.constants import FEATURE_STORE_CLIENT
14
15
 
15
16
  from wedata.feature_store.constants.constants import APPEND, DEFAULT_WRITE_STREAM_TRIGGER
16
- from wedata.feature_store.constants.engine_types import EngineTypes
17
- from wedata.feature_store.common.store_config.redis import RedisStoreConfig
18
- from wedata.feature_store.cloud_sdk_client.models import TaskSchedulerConfiguration
19
17
  from wedata.feature_store.entities.feature_function import FeatureFunction
20
18
  from wedata.feature_store.entities.feature_lookup import FeatureLookup
21
19
  from wedata.feature_store.entities.feature_table import FeatureTable
@@ -30,7 +28,7 @@ from wedata.feature_store.utils.feature_utils import format_feature_lookups_and_
30
28
  class FeatureStoreClient:
31
29
  """特征存储统一客户端,提供特征全生命周期管理能力"""
32
30
 
33
- def __init__(self, spark: Optional[SparkSession] = None, cloud_secret_id: str = None, cloud_secret_key: str = None):
31
+ def __init__(self, spark: Optional[SparkSession] = None):
34
32
  """
35
33
  :param spark: 已初始化的SparkSession对象
36
34
  """
@@ -38,20 +36,19 @@ class FeatureStoreClient:
38
36
  spark = SparkSession.builder.getOrCreate()
39
37
  self._spark = spark
40
38
  self._spark_client = SparkClient(spark)
41
- self._feature_table_client = FeatureTableClient(spark, cloud_secret_id=cloud_secret_id,
42
- cloud_secret_key=cloud_secret_key)
39
+ self._feature_table_client = FeatureTableClient(spark)
43
40
  self._training_set_client = TrainingSetClient(self._spark_client)
41
+ self._init_catalog()
44
42
 
45
43
  def create_table(
46
44
  self,
47
45
  name: str,
48
46
  primary_keys: Union[str, List[str]],
49
- timestamp_key: [str],
50
- engine_type: [EngineTypes],
51
- data_source_name: [str],
52
47
  database_name: Optional[str] = None,
48
+ location: Optional[str] = None,
53
49
  df: Optional[DataFrame] = None,
54
50
  *,
51
+ timestamp_keys: Union[str, List[str], None] = None,
55
52
  partition_columns: Union[str, List[str], None] = None,
56
53
  schema: Optional[StructType] = None,
57
54
  description: Optional[str] = None,
@@ -63,11 +60,10 @@ class FeatureStoreClient:
63
60
  Args:
64
61
  name: 特征表全称(格式:<table>)
65
62
  primary_keys: 主键列名(支持复合主键)
66
- timestamp_key: 时间戳键(用于时态特征)
67
- engine_type: 引擎类型 wedata.feature_store.constants.engine_types.EngineTypes
68
- data_source_name: 数据源名称
69
63
  database_name: 数据库名
64
+ location: cos存储位置
70
65
  df: 初始数据(可选,用于推断schema)
66
+ timestamp_keys: 时间戳键(用于时态特征)
71
67
  partition_columns: 分区列(优化存储查询)
72
68
  schema: 表结构定义(可选,当不提供df时必需)
73
69
  description: 业务描述
@@ -83,56 +79,43 @@ class FeatureStoreClient:
83
79
  return self._feature_table_client.create_table(
84
80
  name=name,
85
81
  primary_keys=primary_keys,
86
- engine_type=engine_type,
87
82
  database_name=database_name,
88
- data_source_name=data_source_name,
83
+ location=location,
89
84
  df=df,
90
- timestamp_key=timestamp_key,
85
+ timestamp_keys=timestamp_keys,
91
86
  partition_columns=partition_columns,
92
87
  schema=schema,
93
88
  description=description,
94
89
  tags=tags
95
90
  )
96
91
 
97
- def register_table(self, name: str, timestamp_key: str, engine_type: EngineTypes, data_source_name: [str],
98
- database_name: Optional[str] = None,
99
- primary_keys: Union[str, List[str]] = None) -> DataFrame:
92
+ def register_table(self, name: str, database_name: Optional[str] = None) -> DataFrame:
100
93
  """
101
- 将普通的表注册为特征表,并返回特征表数据
94
+ 读取特征表数据
102
95
 
103
96
  Args:
104
97
  name: 特征表名称
105
98
  database_name: 特征库名称
106
- timestamp_key: 时间戳键 (用于后续离在线特征同步)
107
- engine_type: 引擎类型 wedata.feature_store.constants.engine_types.EngineTypes
108
- data_source_name: 数据源名称
109
- primary_keys: 主键列名(支持复合主键)(仅当engine_type为EngineTypes.HIVE_ENGINE时有效)
99
+
110
100
  Returns:
111
101
  DataFrame: 包含特征表数据的DataFrame对象
112
102
  """
113
103
 
114
- return self._feature_table_client.register_table(name, database_name, timestamp_key=timestamp_key,
115
- engine_type=engine_type, primary_keys=primary_keys, data_source_name=data_source_name)
104
+ return self._feature_table_client.register_table(name, database_name)
116
105
 
117
- def read_table(self, name: str, database_name: Optional[str] = None, is_online: bool = False,
118
- online_config: Optional[RedisStoreConfig] = None,
119
- entity_row: Optional[List[Dict[str, Any]]] = None) -> DataFrame:
106
+ def read_table(self, name: str, database_name: Optional[str] = None) -> DataFrame:
120
107
  """
121
108
  读取特征表数据
122
109
 
123
110
  Args:
124
111
  name: 特征表名称
125
112
  database_name: 特征库名称
126
- is_online: 是否读取在线特征表(默认不读取)
127
- online_config: 在线特征表配置(仅当is_online为True时有效)
128
- entity_row: 实体行数据(仅当is_online为True时有效)
129
- [{primary_key1: [value1, value2]}, {primary_key2: [value1, value2]}]
113
+
130
114
  Returns:
131
115
  DataFrame: 包含特征表数据的DataFrame对象
132
116
  """
133
117
 
134
- return self._feature_table_client.read_table(name=name, database_name=database_name, is_online=is_online,
135
- online_config=online_config, entity_row=entity_row)
118
+ return self._feature_table_client.read_table(name, database_name)
136
119
 
137
120
  def get_table(self, name: str, database_name: Optional[str] = None) -> FeatureTable:
138
121
  """
@@ -154,6 +137,7 @@ class FeatureStoreClient:
154
137
  Args:
155
138
  name: 要删除的特征表名称
156
139
  database_name: database name
140
+
157
141
  Returns:
158
142
  None
159
143
  """
@@ -288,7 +272,7 @@ class FeatureStoreClient:
288
272
  )
289
273
 
290
274
  def score_batch(
291
- self, model_uri: str, df: DataFrame, result_type: str = "double", timestamp_key: str = None
275
+ self, model_uri: str, df: DataFrame, result_type: str = "double"
292
276
  ) -> DataFrame:
293
277
  """
294
278
  Evaluate the model on the provided :class:`DataFrame <pyspark.sql.DataFrame>`.
@@ -399,65 +383,17 @@ class FeatureStoreClient:
399
383
  df=df,
400
384
  result_type=result_type,
401
385
  client_name=FEATURE_STORE_CLIENT,
402
- timestamp_key=timestamp_key,
403
386
  )
404
387
 
405
- def publish_table(self, table_name: str, data_source_name: str, cloud_secret_id: str, cloud_secret_key: str,
406
- database_name: Optional[str] = None,
407
- is_cycle: bool = False, cycle_obj: TaskSchedulerConfiguration = None,
408
- is_use_default_online: bool = True, online_config: RedisStoreConfig = None):
409
- """
410
- Publish an offline feature table to an online feature table.
411
-
412
- This method synchronizes the offline feature table data to online storage
413
- for low-latency feature serving in real-time applications.
414
-
415
- Args:
416
- table_name: Name of the offline feature table
417
- data_source_name: Name of the data source
418
- cloud_secret_id: Cloud secret ID for authentication
419
- cloud_secret_key: Cloud secret key for authentication
420
- database_name: Database name (optional)
421
- is_cycle: Whether to enable periodic publishing (default: False)
422
- cycle_obj: Periodic task configuration object (required if is_cycle is True)
423
- is_use_default_online: Whether to use default online storage configuration (default: True)
424
- online_config: Custom online storage configuration (only effective when is_use_default_online is False)
425
-
426
- Returns:
427
- None
428
-
429
- """
430
- return self._feature_table_client.publish_table(table_name=table_name, database_name=database_name,
431
- data_source_name=data_source_name,
432
- cloud_secret_key=cloud_secret_key,
433
- cloud_secret_id=cloud_secret_id,
434
- is_cycle=is_cycle, cycle_obj=cycle_obj,
435
- is_use_default_online=is_use_default_online,
436
- online_config=online_config)
437
-
438
- def drop_online_table(self, table_name: str, online_config: RedisStoreConfig, database_name: Optional[str] = None):
439
- """
440
- Drop an online feature table.
441
- :param table_name: Name of the offline feature table
442
- :param database_name: Database name (optional)
443
- :param online_config: Custom online storage configuration (only effective when is_use_default_online is False)
444
- :return:
445
- """
446
- self._feature_table_client.drop_online_table(table_name=table_name, database_name=database_name, online_config=online_config)
447
-
448
- def create_feature_spec(
449
- self, name: str,
450
- features: List[Union[FeatureLookup, FeatureFunction]],
451
- exclude_columns: List[str]):
452
-
453
- """
454
- 创建特征配置文件
455
- :arg name: 特征配置文件名称
456
- :arg features: 特征列表,可以是FeatureLookup(特征查找)或FeatureFunction(特征函数)
457
- :arg exclude_columns: 需要从最终特征集中排除的列名列表
458
- """
459
- return self._training_set_client.create_feature_spec(name, features, self._spark_client, exclude_columns)
460
-
461
388
  @property
462
389
  def spark(self):
463
390
  return self._spark
391
+
392
+ def _init_catalog(self):
393
+ """关联catalog"""
394
+ qcloud_region = os.getenv("QCLOUD_REGION") or os.getenv("REGION") or os.getenv("KERNEL_COS_REGION") \
395
+ or os.getenv("NOTEBOOK_COS_REGION")
396
+ if qcloud_region:
397
+ mlflow.set_registry_uri(f"tclake:{qcloud_region}")
398
+
399
+
@@ -43,7 +43,7 @@ ML_MODEL = "MLmodel"
43
43
  FEATURE_LOOKUP_CLIENT_PIP_PACKAGE = "tencent-wedata-feature-engineering-dev"
44
44
 
45
45
  # 特征查找版本号
46
- FEATURE_LOOKUP_CLIENT_MAJOR_VERSION = "0.1.50"
46
+ FEATURE_LOOKUP_CLIENT_MAJOR_VERSION = "0.2.0"
47
47
 
48
48
  # 特征存储内部数据目录
49
49
  FEATURE_STORE_INTERNAL_DATA_DIR = "_wedata_internal/"
@@ -53,7 +53,4 @@ WEDATA_DEFAULT_FEATURE_STORE_DATABASE = "WEDATA_DEFAULT_FEATURE_STORE_DATABASE"
53
53
  FEATURE_TABLE_KEY = "wedata.feature_table"
54
54
  FEATURE_TABLE_VALUE = "true"
55
55
 
56
- FEATURE_TABLE_PROJECT = "wedata.feature_project_id"
57
- FEATURE_TABLE_TIMESTAMP = "timestampKeys"
58
- FEATURE_TABLE_BACKUP_PRIMARY_KEY = "primaryKeys" # 备用标识,主键
59
- FEATURE_DLC_TABLE_PRIMARY_KEY = "dlc.ao.data.govern.sorted.keys"
56
+ FEATURE_TABLE_PROJECT = "wedata.feature_project_id"
@@ -4,7 +4,6 @@ import logging
4
4
  from typing import Dict, List, Optional, Union
5
5
 
6
6
  from wedata.feature_store.utils import common_utils
7
- from wedata.feature_store.common.store_config.redis import RedisStoreConfig
8
7
 
9
8
  _logger = logging.getLogger(__name__)
10
9
 
@@ -20,8 +19,6 @@ class FeatureLookup:
20
19
 
21
20
  - table_name:特征表的名称。
22
21
  - lookup_key:用于在特征表和训练集之间进行联接的键。lookup_key必须是训练集中的列。lookup_key的类型和顺序必须与特征表的主键匹配。
23
- - is_online:如果为True,则会使用在线特征表。如果为False,则会使用离线特征表。默认值为False。
24
- - online_config:如果is_online为True,则会使用此配置来配置在线特征表。默认值为None。
25
22
  - feature_names:要从特征表中查找的特征的名称。如果您的模型需要主键作为特征,则可以将它们声明为独立的FeatureLookups。
26
23
  - rename_outputs:如果提供,则会将特征重命名为 :meth:`create_training_set() <databricks.feature_engineering.client.FeatureEngineeringClient.create_training_set>`返回的 :class:`TrainingSet <databricks.ml_features.training_set.TrainingSet>` 中的特征。
27
24
  - timestamp_lookup_key:用于在特征表和训练集之间进行联接的时间戳键。timestamp_lookup_key必须是训练集中的列。timestamp_lookup_key的类型必须与特征表的时间戳键的类型匹配。
@@ -49,8 +46,6 @@ class FeatureLookup:
49
46
  table_name: str,
50
47
  lookup_key: Union[str, List[str]],
51
48
  *,
52
- is_online: bool = False,
53
- online_config: RedisStoreConfig = None,
54
49
  feature_names: Union[str, List[str], None] = None,
55
50
  rename_outputs: Optional[Dict[str, str]] = None,
56
51
  timestamp_lookup_key: Optional[str] = None,
@@ -109,8 +104,6 @@ class FeatureLookup:
109
104
  self._lookup_key = copy.copy(lookup_key)
110
105
  self._timestamp_lookup_key = copy.copy(timestamp_lookup_key)
111
106
  self._lookback_window = copy.copy(lookback_window)
112
- self._is_online = is_online
113
- self._online_config = online_config
114
107
 
115
108
  self._rename_outputs = {}
116
109
  if rename_outputs is not None:
@@ -156,16 +149,6 @@ class FeatureLookup:
156
149
  """A lookback window applied only for point-in-time lookups."""
157
150
  return self._lookback_window
158
151
 
159
- @property
160
- def is_online(self):
161
- """Whether to use online feature tables."""
162
- return self._is_online
163
-
164
- @property
165
- def online_config(self):
166
- """The online feature table configuration."""
167
- return self._online_config
168
-
169
152
  def _get_feature_names(self):
170
153
  return self._feature_names
171
154
 
@@ -138,14 +138,14 @@ class FeatureSpec:
138
138
 
139
139
  # function_infos should not be duplicated
140
140
  common_utils.validate_strings_unique(
141
- [function_info.full_name for function_info in self.function_infos],
141
+ [function_info.udf_name for function_info in self.function_infos],
142
142
  "Internal Error: Expect all udf_names in function_infos to be unique. Found duplicates {}",
143
143
  )
144
144
 
145
145
  # Unique UDF names in function_infos must match those in column_infos.
146
146
  # No version check is required as both fields were added simultaneously in FeatureSpec v5.
147
147
  unique_udf_names = set(
148
- [function_info.full_name for function_info in self.function_infos]
148
+ [function_info.udf_name for function_info in self.function_infos]
149
149
  )
150
150
  unique_column_udf_names = set(
151
151
  [odci.udf_name for odci in self.on_demand_column_infos]
@@ -75,15 +75,11 @@ class FeatureTable:
75
75
  格式化的字符串,包含表名、ID、描述、主键、分区列、特征数量、
76
76
  时间戳键、创建时间、数据源数量和标签数量等信息
77
77
  """
78
- if self.description and len(self.description) > 50:
79
- desc = self.description[:50] + "..."
80
- else:
81
- desc = self.description
82
78
  return (
83
79
  f"FeatureTable(\n"
84
80
  f" name='{self.name}',\n"
85
81
  f" table_id='{self.table_id}',\n"
86
- f" description='{desc}',\n"
82
+ f" description='{self.description[:50]}{'...' if len(self.description) > 50 else ''}',\n"
87
83
  f" primary_keys={self.primary_keys},\n"
88
84
  f" partition_columns={self.partition_columns},\n"
89
85
  f" features={len(self.features)},\n"
@@ -1,6 +1,9 @@
1
-
1
+ from collections import defaultdict
2
2
  from typing import List, Optional
3
3
 
4
+ from pyspark.sql import Column, DataFrame
5
+ from pyspark.sql.functions import isnull, when
6
+ from pyspark.sql.types import StringType, StructField, StructType
4
7
  from wedata.feature_store.common.protos import feature_store_pb2
5
8
 
6
9
  class FunctionParameterInfo():