tencent-wedata-feature-engineering-dev 0.1.47__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tencent-wedata-feature-engineering-dev might be problematic. Click here for more details.
- {tencent_wedata_feature_engineering_dev-0.1.47.dist-info → tencent_wedata_feature_engineering_dev-0.2.2.dist-info}/METADATA +9 -3
- {tencent_wedata_feature_engineering_dev-0.1.47.dist-info → tencent_wedata_feature_engineering_dev-0.2.2.dist-info}/RECORD +15 -13
- {tencent_wedata_feature_engineering_dev-0.1.47.dist-info → tencent_wedata_feature_engineering_dev-0.2.2.dist-info}/WHEEL +1 -1
- wedata/feature_store/client.py +3 -6
- wedata/feature_store/cloud_sdk_client/client.py +55 -11
- wedata/feature_store/cloud_sdk_client/models.py +212 -37
- wedata/feature_store/cloud_sdk_client/utils.py +14 -0
- wedata/feature_store/common/log/__init__.py +0 -0
- wedata/feature_store/common/log/logger.py +44 -0
- wedata/feature_store/constants/constants.py +1 -1
- wedata/feature_store/feature_table_client/feature_table_client.py +54 -39
- wedata/feature_store/training_set_client/training_set_client.py +2 -8
- wedata/feature_store/utils/common_utils.py +5 -7
- wedata/feature_store/utils/env_utils.py +12 -0
- {tencent_wedata_feature_engineering_dev-0.1.47.dist-info → tencent_wedata_feature_engineering_dev-0.2.2.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: tencent-wedata-feature-engineering-dev
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Wedata Feature Engineering Library Development
|
|
5
5
|
Home-page:
|
|
6
6
|
Author: meahqian
|
|
@@ -16,4 +16,10 @@ Requires-Dist: feast[redis]==0.49.0
|
|
|
16
16
|
Requires-Dist: grpcio==1.74.0
|
|
17
17
|
Requires-Dist: tencentcloud-sdk-python
|
|
18
18
|
Requires-Dist: ipython
|
|
19
|
-
|
|
19
|
+
Dynamic: author
|
|
20
|
+
Dynamic: classifier
|
|
21
|
+
Dynamic: description-content-type
|
|
22
|
+
Dynamic: license
|
|
23
|
+
Dynamic: requires-dist
|
|
24
|
+
Dynamic: requires-python
|
|
25
|
+
Dynamic: summary
|
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
wedata/__init__.py,sha256=GYxqkkgH0oH4QtNiOCZHuGkc0sSH1LgEqmhSX6sB4So,200
|
|
2
2
|
wedata/feature_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
wedata/feature_store/client.py,sha256=
|
|
3
|
+
wedata/feature_store/client.py,sha256=7eBWrhjsr5mxyCRnG0rI6eTMocFDUDlfOr3MwW9GBa0,19197
|
|
4
4
|
wedata/feature_store/mlflow_model.py,sha256=OCUuccOoO0NXWSzIPoGeL03Ha1Q3aQTJW2RlJrTCmzc,554
|
|
5
5
|
wedata/feature_store/cloud_sdk_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
wedata/feature_store/cloud_sdk_client/client.py,sha256=
|
|
7
|
-
wedata/feature_store/cloud_sdk_client/models.py,sha256=
|
|
8
|
-
wedata/feature_store/cloud_sdk_client/utils.py,sha256=
|
|
6
|
+
wedata/feature_store/cloud_sdk_client/client.py,sha256=I4l3B_4DoikCQruUXL1aXgBgDFZ9pt-aCcS2UXK5qLQ,6290
|
|
7
|
+
wedata/feature_store/cloud_sdk_client/models.py,sha256=aQ8mqPBbzfaX2AzGTBx3-QjDRqqiw9KMmdSpIdY4f4E,24856
|
|
8
|
+
wedata/feature_store/cloud_sdk_client/utils.py,sha256=vffI9rAGkW12dX7IN-QUmIIG865_WY75GtKJgVF6aho,1098
|
|
9
9
|
wedata/feature_store/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
wedata/feature_store/common/log/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
wedata/feature_store/common/log/logger.py,sha256=jwTecUnTrR6LL5ocCzTuuPLqW2swjEf4HZSFHTEY5Fc,1287
|
|
10
12
|
wedata/feature_store/common/protos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
13
|
wedata/feature_store/common/protos/feature_store_pb2.py,sha256=oMIUGGeGNP84g_nFqOQwTXjV1GiU2ehSOy7CyFu2__g,4207
|
|
12
14
|
wedata/feature_store/common/store_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
15
|
wedata/feature_store/common/store_config/redis.py,sha256=9R5npM2s1u0o9IakmpbRsFdJC0vNar_uvA62OLWuXBs,1145
|
|
14
16
|
wedata/feature_store/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
wedata/feature_store/constants/constants.py,sha256=
|
|
17
|
+
wedata/feature_store/constants/constants.py,sha256=FXKE8BHnp04SDnE2XNrtXHk7U6dpgo8NTvtbaSCiC64,1958
|
|
16
18
|
wedata/feature_store/constants/engine_types.py,sha256=42mI-kNDDtoA4_I3iqDe4FkF2M2l_Bt4Q1V6WUB-_k0,921
|
|
17
19
|
wedata/feature_store/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
20
|
wedata/feature_store/entities/column_info.py,sha256=-AR6EKHwgoqIkRHFyguxVEtnYt6fvusWHkEjF4kvS0A,5141
|
|
@@ -32,14 +34,14 @@ wedata/feature_store/entities/training_set.py,sha256=ylt1h6Z_xU8hKYvnvd80CeewTGS
|
|
|
32
34
|
wedata/feature_store/feast_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
35
|
wedata/feature_store/feast_client/feast_client.py,sha256=mCv-OiKehfgcOJhJV0wXMRs5d7e2zEBYmVmDguk0rxU,20728
|
|
34
36
|
wedata/feature_store/feature_table_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
wedata/feature_store/feature_table_client/feature_table_client.py,sha256=
|
|
37
|
+
wedata/feature_store/feature_table_client/feature_table_client.py,sha256=a9jI_SkAtzdVHs763YvC1wClAB9SlF0wVZazjJIDqks,43699
|
|
36
38
|
wedata/feature_store/spark_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
39
|
wedata/feature_store/spark_client/spark_client.py,sha256=aTenEqfZoJYMrph98qjNHZ-H4dgNKnMaH14st8bCVRQ,11797
|
|
38
40
|
wedata/feature_store/training_set_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
wedata/feature_store/training_set_client/training_set_client.py,sha256=
|
|
41
|
+
wedata/feature_store/training_set_client/training_set_client.py,sha256=aFx2BLGoKg8OlawS39DQlyyGXqGrS2h62_NsAClD4tU,23186
|
|
40
42
|
wedata/feature_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
-
wedata/feature_store/utils/common_utils.py,sha256=
|
|
42
|
-
wedata/feature_store/utils/env_utils.py,sha256=
|
|
43
|
+
wedata/feature_store/utils/common_utils.py,sha256=FbHL0bQ14LpR5NRQc7NhPLFCoBuiCfaKEAW5_BdUWzU,12313
|
|
44
|
+
wedata/feature_store/utils/env_utils.py,sha256=Zj6569qDZR5fhfpKimOzwasaVT04q2c3BIUy5Uctv2g,3160
|
|
43
45
|
wedata/feature_store/utils/feature_lookup_utils.py,sha256=mNV6RhBdpv1iTZduCA9YwXwkeJCwU5MFQ1MkFeD9IhY,22003
|
|
44
46
|
wedata/feature_store/utils/feature_spec_utils.py,sha256=j8t-zel2_r8Q9m88BmFKkHMdkGNIduWJB-28OZDASRY,11613
|
|
45
47
|
wedata/feature_store/utils/feature_utils.py,sha256=KKq28bVB_lCuhnR9Hk6JegJBOVgcelWlvrRM-F9onkA,2796
|
|
@@ -58,7 +60,7 @@ wedata/tempo/ml.py,sha256=WtGa2szn6PditvZsTZoxo7wFDe4k1SRoMZ-jgNGIjvE,4323
|
|
|
58
60
|
wedata/tempo/resample.py,sha256=h81RVVmCl4ect-YKE-KZZHPDi1rGI3sh-YIb-Btz0ck,9698
|
|
59
61
|
wedata/tempo/tsdf.py,sha256=S4lZfxhSRFiezYoYS6gvGsl1mZA3zp-MWEKFHYZpDg0,70968
|
|
60
62
|
wedata/tempo/utils.py,sha256=I9I6l2DMwUoY213L04Yc1UR_zTWgSkj1BVo4ZwzQd4Y,7977
|
|
61
|
-
tencent_wedata_feature_engineering_dev-0.
|
|
62
|
-
tencent_wedata_feature_engineering_dev-0.
|
|
63
|
-
tencent_wedata_feature_engineering_dev-0.
|
|
64
|
-
tencent_wedata_feature_engineering_dev-0.
|
|
63
|
+
tencent_wedata_feature_engineering_dev-0.2.2.dist-info/METADATA,sha256=rvc2LgvO0vPYJQLogsjfx3gDN43xFlSm4UdZSHk_Mj8,732
|
|
64
|
+
tencent_wedata_feature_engineering_dev-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
65
|
+
tencent_wedata_feature_engineering_dev-0.2.2.dist-info/top_level.txt,sha256=Xa0v1rh__RvfVTVDirW5r5UBKg7ZO_iuTeXfp8MNo2A,7
|
|
66
|
+
tencent_wedata_feature_engineering_dev-0.2.2.dist-info/RECORD,,
|
wedata/feature_store/client.py
CHANGED
|
@@ -288,7 +288,7 @@ class FeatureStoreClient:
|
|
|
288
288
|
)
|
|
289
289
|
|
|
290
290
|
def score_batch(
|
|
291
|
-
self, model_uri: str, df: DataFrame, result_type: str = "double"
|
|
291
|
+
self, model_uri: str, df: DataFrame, result_type: str = "double", timestamp_key: str = None
|
|
292
292
|
) -> DataFrame:
|
|
293
293
|
"""
|
|
294
294
|
Evaluate the model on the provided :class:`DataFrame <pyspark.sql.DataFrame>`.
|
|
@@ -399,9 +399,10 @@ class FeatureStoreClient:
|
|
|
399
399
|
df=df,
|
|
400
400
|
result_type=result_type,
|
|
401
401
|
client_name=FEATURE_STORE_CLIENT,
|
|
402
|
+
timestamp_key=timestamp_key,
|
|
402
403
|
)
|
|
403
404
|
|
|
404
|
-
def publish_table(self, table_name: str, data_source_name: str,
|
|
405
|
+
def publish_table(self, table_name: str, data_source_name: str,
|
|
405
406
|
database_name: Optional[str] = None,
|
|
406
407
|
is_cycle: bool = False, cycle_obj: TaskSchedulerConfiguration = None,
|
|
407
408
|
is_use_default_online: bool = True, online_config: RedisStoreConfig = None):
|
|
@@ -414,8 +415,6 @@ class FeatureStoreClient:
|
|
|
414
415
|
Args:
|
|
415
416
|
table_name: Name of the offline feature table
|
|
416
417
|
data_source_name: Name of the data source
|
|
417
|
-
cloud_secret_id: Cloud secret ID for authentication
|
|
418
|
-
cloud_secret_key: Cloud secret key for authentication
|
|
419
418
|
database_name: Database name (optional)
|
|
420
419
|
is_cycle: Whether to enable periodic publishing (default: False)
|
|
421
420
|
cycle_obj: Periodic task configuration object (required if is_cycle is True)
|
|
@@ -428,8 +427,6 @@ class FeatureStoreClient:
|
|
|
428
427
|
"""
|
|
429
428
|
return self._feature_table_client.publish_table(table_name=table_name, database_name=database_name,
|
|
430
429
|
data_source_name=data_source_name,
|
|
431
|
-
cloud_secret_key=cloud_secret_key,
|
|
432
|
-
cloud_secret_id=cloud_secret_id,
|
|
433
430
|
is_cycle=is_cycle, cycle_obj=cycle_obj,
|
|
434
431
|
is_use_default_online=is_use_default_online,
|
|
435
432
|
online_config=online_config)
|
|
@@ -4,8 +4,9 @@ from tencentcloud.wedata.v20210820.wedata_client import WedataClient
|
|
|
4
4
|
from tencentcloud.wedata.v20250806.wedata_client import WedataClient as WedataClientV2
|
|
5
5
|
from tencentcloud.common import credential
|
|
6
6
|
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
|
|
7
|
-
from wedata.feature_store.cloud_sdk_client.utils import get_client_profile, set_request_header
|
|
7
|
+
from wedata.feature_store.cloud_sdk_client.utils import get_client_profile, set_request_header, is_mock
|
|
8
8
|
import wedata.feature_store.cloud_sdk_client.models as models
|
|
9
|
+
from wedata.feature_store.common.log.logger import get_logger
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class FeatureCloudSDK:
|
|
@@ -22,17 +23,21 @@ class FeatureCloudSDK:
|
|
|
22
23
|
Returns:
|
|
23
24
|
创建结果响应
|
|
24
25
|
"""
|
|
26
|
+
logger = get_logger()
|
|
27
|
+
if is_mock():
|
|
28
|
+
logger.debug("Mock CreateOnlineFeatureTable API")
|
|
29
|
+
return models.CreateOnlineFeatureTableResponse()
|
|
25
30
|
try:
|
|
26
31
|
params = request._serialize()
|
|
27
32
|
headers = set_request_header(request.headers)
|
|
28
|
-
|
|
29
|
-
|
|
33
|
+
logger.debug(f"CreateOnlineFeatureTable params: {params}")
|
|
34
|
+
logger.debug(f"CreateOnlineFeatureTable headers: {headers}")
|
|
30
35
|
self._client._apiVersion = "2021-08-20"
|
|
31
36
|
body = self._client.call("CreateOnlineFeatureTable", params, headers=headers)
|
|
32
37
|
response = json.loads(body)
|
|
33
38
|
model = models.CreateOnlineFeatureTableResponse()
|
|
34
39
|
model._deserialize(response["Response"])
|
|
35
|
-
|
|
40
|
+
logger.debug(f"CreateOnlineFeatureTable Response: {response}")
|
|
36
41
|
return model
|
|
37
42
|
except Exception as e:
|
|
38
43
|
if isinstance(e, TencentCloudSDKException):
|
|
@@ -49,17 +54,22 @@ class FeatureCloudSDK:
|
|
|
49
54
|
Returns:
|
|
50
55
|
查询结果响应
|
|
51
56
|
"""
|
|
57
|
+
logger = get_logger()
|
|
58
|
+
if is_mock():
|
|
59
|
+
logger.debug("Mock DescribeNormalSchedulerExecutorGroups API")
|
|
60
|
+
return models.DescribeNormalSchedulerExecutorGroupsResponse()
|
|
61
|
+
|
|
52
62
|
try:
|
|
53
63
|
params = request._serialize()
|
|
54
64
|
headers = set_request_header(request.headers)
|
|
55
|
-
|
|
56
|
-
|
|
65
|
+
logger.debug(f"DescribeNormalSchedulerExecutorGroups params: {params}")
|
|
66
|
+
logger.debug(f"DescribeNormalSchedulerExecutorGroups headers: {headers}")
|
|
57
67
|
self._client._apiVersion = "2021-08-20"
|
|
58
68
|
body = self._client.call("DescribeNormalSchedulerExecutorGroups", params, headers=headers)
|
|
59
69
|
response = json.loads(body)
|
|
60
70
|
model = models.DescribeNormalSchedulerExecutorGroupsResponse()
|
|
61
71
|
model._deserialize(response["Response"])
|
|
62
|
-
|
|
72
|
+
logger.debug(f"DescribeNormalSchedulerExecutorGroups Response: {response}")
|
|
63
73
|
return model
|
|
64
74
|
except Exception as e:
|
|
65
75
|
if isinstance(e, TencentCloudSDKException):
|
|
@@ -75,20 +85,54 @@ class FeatureCloudSDK:
|
|
|
75
85
|
Returns:
|
|
76
86
|
刷新结果响应
|
|
77
87
|
"""
|
|
88
|
+
logger = get_logger()
|
|
89
|
+
if is_mock():
|
|
90
|
+
logger.debug("Mock RefreshFeatureTable API")
|
|
91
|
+
return models.RefreshFeatureTableResponse()
|
|
78
92
|
try:
|
|
79
93
|
params = request._serialize()
|
|
80
94
|
headers = set_request_header(request.headers)
|
|
81
|
-
|
|
82
|
-
|
|
95
|
+
logger.debug(f"RefreshFeatureTable params: {params}")
|
|
96
|
+
logger.debug(f"RefreshFeatureTable headers: {headers}")
|
|
83
97
|
self._client_v2._apiVersion = "2025-08-06"
|
|
84
98
|
body = self._client_v2.call("RefreshFeatureTable", params, headers=headers)
|
|
85
99
|
response = json.loads(body)
|
|
86
100
|
model = models.RefreshFeatureTableResponse()
|
|
87
101
|
model._deserialize(response["Response"])
|
|
88
|
-
|
|
102
|
+
logger.debug(f"RefreshFeatureTable Response: {response}")
|
|
103
|
+
return model
|
|
104
|
+
except Exception as e:
|
|
105
|
+
if isinstance(e, TencentCloudSDKException):
|
|
106
|
+
raise
|
|
107
|
+
else:
|
|
108
|
+
raise TencentCloudSDKException(type(e).__name__, str(e))
|
|
109
|
+
|
|
110
|
+
def DescribeFeatureStoreDatabases(self, request: models.DescribeFeatureStoreDatabasesRequest) -> 'models.DescribeFeatureStoreDatabasesResponse':
|
|
111
|
+
"""
|
|
112
|
+
查询特征库列表
|
|
113
|
+
Args:
|
|
114
|
+
request: 查询请求参数
|
|
115
|
+
Returns:
|
|
116
|
+
查询结果响应
|
|
117
|
+
"""
|
|
118
|
+
logger = get_logger()
|
|
119
|
+
if is_mock():
|
|
120
|
+
logger.debug("Mock DescribeFeatureStoreDatabases API")
|
|
121
|
+
return models.DescribeFeatureStoreDatabasesResponse()
|
|
122
|
+
try:
|
|
123
|
+
params = request._serialize()
|
|
124
|
+
headers = set_request_header(request.headers)
|
|
125
|
+
logger.debug(f"DescribeFeatureStoreDatabases params: {params}")
|
|
126
|
+
logger.debug(f"DescribeFeatureStoreDatabases headers: {headers}")
|
|
127
|
+
self._client_v2._apiVersion = "2021-08-20"
|
|
128
|
+
body = self._client_v2.call("DescribeFeatureStoreDatabases", params, headers=headers)
|
|
129
|
+
response = json.loads(body)
|
|
130
|
+
model = models.DescribeFeatureStoreDatabasesResponse()
|
|
131
|
+
model._deserialize(response["Response"])
|
|
132
|
+
logger.debug(f"DescribeFeatureStoreDatabases Response: {response}")
|
|
89
133
|
return model
|
|
90
134
|
except Exception as e:
|
|
91
135
|
if isinstance(e, TencentCloudSDKException):
|
|
92
136
|
raise
|
|
93
137
|
else:
|
|
94
|
-
raise TencentCloudSDKException(type(e).__name__, str(e))
|
|
138
|
+
raise TencentCloudSDKException(type(e).__name__, str(e))
|
|
@@ -1,5 +1,8 @@
|
|
|
1
|
+
|
|
1
2
|
from tencentcloud.common.abstract_model import AbstractModel
|
|
3
|
+
import typing
|
|
2
4
|
import warnings
|
|
5
|
+
from wedata.feature_store.cloud_sdk_client.utils import is_warning
|
|
3
6
|
|
|
4
7
|
|
|
5
8
|
class OfflineFeatureConfiguration(AbstractModel):
|
|
@@ -85,8 +88,8 @@ class OfflineFeatureConfiguration(AbstractModel):
|
|
|
85
88
|
property_name = name[1:]
|
|
86
89
|
if property_name in member_set:
|
|
87
90
|
member_set.remove(property_name)
|
|
88
|
-
if len(member_set) > 0:
|
|
89
|
-
warnings.warn("%s
|
|
91
|
+
if len(member_set) > 0 and is_warning():
|
|
92
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
90
93
|
|
|
91
94
|
|
|
92
95
|
class TaskSchedulerConfiguration(AbstractModel):
|
|
@@ -190,8 +193,8 @@ class TaskSchedulerConfiguration(AbstractModel):
|
|
|
190
193
|
property_name = name[1:]
|
|
191
194
|
if property_name in member_set:
|
|
192
195
|
member_set.remove(property_name)
|
|
193
|
-
if len(member_set) > 0:
|
|
194
|
-
warnings.warn("%s
|
|
196
|
+
if len(member_set) > 0 and is_warning():
|
|
197
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
195
198
|
|
|
196
199
|
|
|
197
200
|
class OnlineFeatureConfiguration(AbstractModel):
|
|
@@ -257,8 +260,8 @@ class OnlineFeatureConfiguration(AbstractModel):
|
|
|
257
260
|
property_name = name[1:]
|
|
258
261
|
if property_name in member_set:
|
|
259
262
|
member_set.remove(property_name)
|
|
260
|
-
if len(member_set) > 0:
|
|
261
|
-
warnings.warn("%s
|
|
263
|
+
if len(member_set) > 0 and is_warning():
|
|
264
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
262
265
|
|
|
263
266
|
|
|
264
267
|
class CreateOnlineFeatureTableRequest(AbstractModel):
|
|
@@ -337,8 +340,8 @@ class CreateOnlineFeatureTableRequest(AbstractModel):
|
|
|
337
340
|
property_name = name[1:]
|
|
338
341
|
if property_name in member_set:
|
|
339
342
|
member_set.remove(property_name)
|
|
340
|
-
if len(member_set) > 0:
|
|
341
|
-
warnings.warn("%s
|
|
343
|
+
if len(member_set) > 0 and is_warning():
|
|
344
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
342
345
|
|
|
343
346
|
|
|
344
347
|
class CreateOnlineFeatureTableRsp(AbstractModel):
|
|
@@ -374,8 +377,8 @@ class CreateOnlineFeatureTableRsp(AbstractModel):
|
|
|
374
377
|
property_name = name[1:]
|
|
375
378
|
if property_name in member_set:
|
|
376
379
|
member_set.remove(property_name)
|
|
377
|
-
if len(member_set) > 0:
|
|
378
|
-
warnings.warn("%s
|
|
380
|
+
if len(member_set) > 0 and is_warning():
|
|
381
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
379
382
|
|
|
380
383
|
|
|
381
384
|
class CreateOnlineFeatureTableResponse(AbstractModel):
|
|
@@ -402,8 +405,8 @@ class CreateOnlineFeatureTableResponse(AbstractModel):
|
|
|
402
405
|
property_name = name[1:]
|
|
403
406
|
if property_name in member_set:
|
|
404
407
|
member_set.remove(property_name)
|
|
405
|
-
if len(member_set) > 0:
|
|
406
|
-
warnings.warn("%s
|
|
408
|
+
if len(member_set) > 0 and is_warning():
|
|
409
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
407
410
|
|
|
408
411
|
|
|
409
412
|
class DescribeNormalSchedulerExecutorGroupsData(AbstractModel):
|
|
@@ -480,12 +483,12 @@ class DescribeNormalSchedulerExecutorGroupsData(AbstractModel):
|
|
|
480
483
|
self._Available = params.get("Available")
|
|
481
484
|
self._PythonSubVersions = params.get("PythonSubVersions")
|
|
482
485
|
self._EnvJson = params.get("EnvJson")
|
|
483
|
-
|
|
486
|
+
member_set = set(params.keys())
|
|
484
487
|
for name, value in vars(self).items():
|
|
485
|
-
if name in
|
|
486
|
-
|
|
487
|
-
if len(
|
|
488
|
-
warnings.warn("%s
|
|
488
|
+
if name in member_set:
|
|
489
|
+
member_set.remove(name)
|
|
490
|
+
if len(member_set) > 0 and is_warning():
|
|
491
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
489
492
|
|
|
490
493
|
|
|
491
494
|
class DescribeNormalSchedulerExecutorGroupsResponse(AbstractModel):
|
|
@@ -511,12 +514,12 @@ class DescribeNormalSchedulerExecutorGroupsResponse(AbstractModel):
|
|
|
511
514
|
obj = DescribeNormalSchedulerExecutorGroupsData()
|
|
512
515
|
obj._deserialize(item)
|
|
513
516
|
self._Data.append(obj)
|
|
514
|
-
|
|
517
|
+
member_set = set(params.keys())
|
|
515
518
|
for name, value in vars(self).items():
|
|
516
|
-
if name in
|
|
517
|
-
|
|
518
|
-
if len(
|
|
519
|
-
warnings.warn("%s
|
|
519
|
+
if name in member_set:
|
|
520
|
+
member_set.remove(name)
|
|
521
|
+
if len(member_set) > 0 and is_warning():
|
|
522
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
520
523
|
|
|
521
524
|
|
|
522
525
|
class DescribeNormalSchedulerExecutorGroupsRequest(AbstractModel):
|
|
@@ -547,12 +550,12 @@ class DescribeNormalSchedulerExecutorGroupsRequest(AbstractModel):
|
|
|
547
550
|
def _deserialize(self, params):
|
|
548
551
|
self._ProjectId = params.get("ProjectId")
|
|
549
552
|
self._OnlyAvailable = params.get("OnlyAvailable")
|
|
550
|
-
|
|
553
|
+
member_set = set(params.keys())
|
|
551
554
|
for name, value in vars(self).items():
|
|
552
|
-
if name in
|
|
553
|
-
|
|
554
|
-
if len(
|
|
555
|
-
warnings.warn("%s
|
|
555
|
+
if name in member_set:
|
|
556
|
+
member_set.remove(name)
|
|
557
|
+
if len(member_set) > 0 and is_warning():
|
|
558
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
556
559
|
|
|
557
560
|
|
|
558
561
|
class RefreshFeatureTableRequest(AbstractModel):
|
|
@@ -651,12 +654,12 @@ class RefreshFeatureTableRequest(AbstractModel):
|
|
|
651
654
|
self._DatasourceType = params.get("DatasourceType")
|
|
652
655
|
self._EngineName = params.get("EngineName")
|
|
653
656
|
self._IsTry = params.get("IsTry")
|
|
654
|
-
|
|
657
|
+
member_set = set(params.keys())
|
|
655
658
|
for name, value in vars(self).items():
|
|
656
|
-
if name in
|
|
657
|
-
|
|
658
|
-
if len(
|
|
659
|
-
warnings.warn("%s
|
|
659
|
+
if name in member_set:
|
|
660
|
+
member_set.remove(name)
|
|
661
|
+
if len(member_set) > 0 and is_warning():
|
|
662
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
660
663
|
|
|
661
664
|
|
|
662
665
|
class RefreshFeatureTableResponse(AbstractModel):
|
|
@@ -678,9 +681,181 @@ class RefreshFeatureTableResponse(AbstractModel):
|
|
|
678
681
|
|
|
679
682
|
def _deserialize(self, params):
|
|
680
683
|
self._Data = params.get("Data")
|
|
681
|
-
|
|
684
|
+
member_set = set(params.keys())
|
|
685
|
+
for name, value in vars(self).items():
|
|
686
|
+
if name in member_set:
|
|
687
|
+
member_set.remove(name)
|
|
688
|
+
if len(member_set) > 0 and is_warning():
|
|
689
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
class FeatureStoreDatabase(AbstractModel):
|
|
693
|
+
"""
|
|
694
|
+
特征存储库
|
|
695
|
+
Property:
|
|
696
|
+
DatabaseName: 特征库名称
|
|
697
|
+
DatasourceType:数据源类型: EMR/DLC
|
|
698
|
+
EngineName: 引擎名称
|
|
699
|
+
ProjectId: 项目ID
|
|
700
|
+
IsDefault: 是否默认库
|
|
701
|
+
IsExistDatabase: 是否存在库
|
|
702
|
+
DatasourceId: 数据源ID
|
|
703
|
+
OnlineMode: 在线模式: 0-离线; 1-在线
|
|
704
|
+
DatasourceName: 数据源名称
|
|
705
|
+
"""
|
|
706
|
+
def __init__(self):
|
|
707
|
+
self._DatabaseName = None
|
|
708
|
+
self._DatasourceType = None
|
|
709
|
+
self._EngineName = None
|
|
710
|
+
self._ProjectId = None
|
|
711
|
+
self._IsDefault = None
|
|
712
|
+
self._IsExistDatabase = None
|
|
713
|
+
self._DatasourceId = None
|
|
714
|
+
self._OnlineMode = None
|
|
715
|
+
self._DatasourceName = None
|
|
716
|
+
|
|
717
|
+
@property
|
|
718
|
+
def DatabaseName(self):
|
|
719
|
+
return self._DatabaseName
|
|
720
|
+
|
|
721
|
+
@DatabaseName.setter
|
|
722
|
+
def DatabaseName(self, DatabaseName):
|
|
723
|
+
self._DatabaseName = DatabaseName
|
|
724
|
+
|
|
725
|
+
@property
|
|
726
|
+
def DatasourceType(self):
|
|
727
|
+
return self._DatasourceType
|
|
728
|
+
|
|
729
|
+
@DatasourceType.setter
|
|
730
|
+
def DatasourceType(self, DatasourceType):
|
|
731
|
+
self._DatasourceType = DatasourceType
|
|
732
|
+
|
|
733
|
+
@property
|
|
734
|
+
def EngineName(self):
|
|
735
|
+
return self._EngineName
|
|
736
|
+
|
|
737
|
+
@EngineName.setter
|
|
738
|
+
def EngineName(self, EngineName):
|
|
739
|
+
self._EngineName = EngineName
|
|
740
|
+
|
|
741
|
+
@property
|
|
742
|
+
def ProjectId(self):
|
|
743
|
+
return self._ProjectId
|
|
744
|
+
|
|
745
|
+
@ProjectId.setter
|
|
746
|
+
def ProjectId(self, ProjectId):
|
|
747
|
+
self._ProjectId = ProjectId
|
|
748
|
+
|
|
749
|
+
@property
|
|
750
|
+
def IsDefault(self):
|
|
751
|
+
return self._IsDefault
|
|
752
|
+
|
|
753
|
+
@IsDefault.setter
|
|
754
|
+
def IsDefault(self, IsDefault):
|
|
755
|
+
self._IsDefault = IsDefault
|
|
756
|
+
|
|
757
|
+
@property
|
|
758
|
+
def IsExistDatabase(self):
|
|
759
|
+
return self._IsExistDatabase
|
|
760
|
+
|
|
761
|
+
@IsExistDatabase.setter
|
|
762
|
+
def IsExistDatabase(self, IsExistDatabase):
|
|
763
|
+
self._IsExistDatabase = IsExistDatabase
|
|
764
|
+
|
|
765
|
+
@property
|
|
766
|
+
def DatasourceId(self):
|
|
767
|
+
return self._DatasourceId
|
|
768
|
+
|
|
769
|
+
@DatasourceId.setter
|
|
770
|
+
def DatasourceId(self, DatasourceId):
|
|
771
|
+
self._DatasourceId = DatasourceId
|
|
772
|
+
|
|
773
|
+
@property
|
|
774
|
+
def OnlineMode(self):
|
|
775
|
+
return self._OnlineMode
|
|
776
|
+
|
|
777
|
+
@OnlineMode.setter
|
|
778
|
+
def OnlineMode(self, OnlineMode):
|
|
779
|
+
self._OnlineMode = OnlineMode
|
|
780
|
+
|
|
781
|
+
@property
|
|
782
|
+
def DatasourceName(self):
|
|
783
|
+
return self._DatasourceName
|
|
784
|
+
|
|
785
|
+
@DatasourceName.setter
|
|
786
|
+
def DatasourceName(self, DatasourceName):
|
|
787
|
+
self._DatasourceName = DatasourceName
|
|
788
|
+
|
|
789
|
+
def _deserialize(self, params):
|
|
790
|
+
self._DatabaseName = params.get("DatabaseName")
|
|
791
|
+
self._DatasourceType = params.get("DatasourceType")
|
|
792
|
+
self._EngineName = params.get("EngineName")
|
|
793
|
+
self._ProjectId = params.get("ProjectId")
|
|
794
|
+
self._IsDefault = params.get("IsDefault")
|
|
795
|
+
self._IsExistDatabase = params.get("IsExistDatabase")
|
|
796
|
+
self._DatasourceId = params.get("DatasourceId")
|
|
797
|
+
self._OnlineMode = params.get("OnlineMode")
|
|
798
|
+
self._DatasourceName = params.get("DatasourceName")
|
|
799
|
+
member_set = set(params.keys())
|
|
800
|
+
for name, value in vars(self).items():
|
|
801
|
+
if name in member_set:
|
|
802
|
+
member_set.remove(name)
|
|
803
|
+
if len(member_set) > 0 and is_warning():
|
|
804
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
class DescribeFeatureStoreDatabasesResponse(AbstractModel):
|
|
808
|
+
"""
|
|
809
|
+
描述特征库
|
|
810
|
+
Property:
|
|
811
|
+
Data: 结果
|
|
812
|
+
"""
|
|
813
|
+
def __init__(self):
|
|
814
|
+
self._Data = None
|
|
815
|
+
|
|
816
|
+
@property
|
|
817
|
+
def Data(self) -> typing.List[FeatureStoreDatabase]:
|
|
818
|
+
return self._Data
|
|
819
|
+
|
|
820
|
+
@Data.setter
|
|
821
|
+
def Data(self, Data):
|
|
822
|
+
self._Data = Data
|
|
823
|
+
|
|
824
|
+
def _deserialize(self, params):
|
|
825
|
+
self._Data = []
|
|
826
|
+
for item in params.get("Data", []):
|
|
827
|
+
obj = FeatureStoreDatabase()
|
|
828
|
+
obj._deserialize(item)
|
|
829
|
+
self._Data.append(obj)
|
|
830
|
+
member_set = set(params.keys())
|
|
831
|
+
for name, value in vars(self).items():
|
|
832
|
+
if name in member_set:
|
|
833
|
+
member_set.remove(name)
|
|
834
|
+
if len(member_set) > 0 and is_warning():
|
|
835
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
836
|
+
|
|
837
|
+
|
|
838
|
+
class DescribeFeatureStoreDatabasesRequest(AbstractModel):
|
|
839
|
+
"""
|
|
840
|
+
Property:
|
|
841
|
+
ProjectId: 项目ID
|
|
842
|
+
"""
|
|
843
|
+
def __init__(self):
|
|
844
|
+
self._ProjectId = None
|
|
845
|
+
|
|
846
|
+
@property
|
|
847
|
+
def ProjectId(self):
|
|
848
|
+
return self._ProjectId
|
|
849
|
+
|
|
850
|
+
@ProjectId.setter
|
|
851
|
+
def ProjectId(self, ProjectId):
|
|
852
|
+
self._ProjectId = ProjectId
|
|
853
|
+
|
|
854
|
+
def _deserialize(self, params):
|
|
855
|
+
self._ProjectId = params.get("ProjectId")
|
|
856
|
+
member_set = set(params.keys())
|
|
682
857
|
for name, value in vars(self).items():
|
|
683
|
-
if name in
|
|
684
|
-
|
|
685
|
-
if len(
|
|
686
|
-
warnings.warn("%s
|
|
858
|
+
if name in member_set:
|
|
859
|
+
member_set.remove(name)
|
|
860
|
+
if len(member_set) > 0 and is_warning():
|
|
861
|
+
warnings.warn("%s fields are useless." % ",".join(member_set))
|
|
@@ -30,3 +30,17 @@ def set_request_header(headers):
|
|
|
30
30
|
headers["X-Qcloud-User-Id"] = os.environ.get("TEST_USER_ID")
|
|
31
31
|
return headers
|
|
32
32
|
|
|
33
|
+
|
|
34
|
+
def is_mock() -> bool:
|
|
35
|
+
"""
|
|
36
|
+
是否为模拟环境
|
|
37
|
+
"""
|
|
38
|
+
return os.getenv("IS_MOCK_API") == "true"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def is_warning() -> bool:
|
|
42
|
+
"""
|
|
43
|
+
是否展示警告环境
|
|
44
|
+
"""
|
|
45
|
+
return os.getenv("IS_CLOUD_API_WARNING") == "true"
|
|
46
|
+
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class LoggerSingleton:
|
|
6
|
+
_instance = None
|
|
7
|
+
|
|
8
|
+
def __new__(cls):
|
|
9
|
+
if cls._instance is None:
|
|
10
|
+
cls._instance = super().__new__(cls)
|
|
11
|
+
cls._instance._initialize_logger()
|
|
12
|
+
return cls._instance
|
|
13
|
+
|
|
14
|
+
def _initialize_logger(self):
|
|
15
|
+
self.logger = logging.getLogger("wedata-feature-engineering")
|
|
16
|
+
self.logger.setLevel(logging.INFO)
|
|
17
|
+
|
|
18
|
+
# 清除已有的handler,避免重复添加
|
|
19
|
+
if self.logger.handlers:
|
|
20
|
+
self.logger.handlers.clear()
|
|
21
|
+
|
|
22
|
+
# 创建formatter,包含时间、文件名和行号
|
|
23
|
+
formatter = logging.Formatter(
|
|
24
|
+
fmt='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
25
|
+
datefmt='%Y-%m-%d %H:%M:%S'
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# 创建handler并输出到stdout
|
|
29
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
30
|
+
handler.setFormatter(formatter)
|
|
31
|
+
|
|
32
|
+
self.logger.addHandler(handler)
|
|
33
|
+
|
|
34
|
+
# 防止消息传播到父级logger
|
|
35
|
+
self.logger.propagate = False
|
|
36
|
+
|
|
37
|
+
def get_logger(self, level=logging.INFO):
|
|
38
|
+
self.logger.setLevel(level)
|
|
39
|
+
return self.logger
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_logger(level=logging.INFO):
|
|
43
|
+
"""获取单例logger实例"""
|
|
44
|
+
return LoggerSingleton().get_logger(level)
|
|
@@ -43,7 +43,7 @@ ML_MODEL = "MLmodel"
|
|
|
43
43
|
FEATURE_LOOKUP_CLIENT_PIP_PACKAGE = "tencent-wedata-feature-engineering-dev"
|
|
44
44
|
|
|
45
45
|
# 特征查找版本号
|
|
46
|
-
FEATURE_LOOKUP_CLIENT_MAJOR_VERSION = "0.
|
|
46
|
+
FEATURE_LOOKUP_CLIENT_MAJOR_VERSION = "0.2.2"
|
|
47
47
|
|
|
48
48
|
# 特征存储内部数据目录
|
|
49
49
|
FEATURE_STORE_INTERNAL_DATA_DIR = "_wedata_internal/"
|
|
@@ -15,14 +15,13 @@ from wedata.feature_store.constants.constants import (
|
|
|
15
15
|
FEATURE_TABLE_VALUE, FEATURE_TABLE_PROJECT, FEATURE_TABLE_TIMESTAMP,
|
|
16
16
|
FEATURE_TABLE_BACKUP_PRIMARY_KEY, FEATURE_DLC_TABLE_PRIMARY_KEY)
|
|
17
17
|
from wedata.feature_store.constants.engine_types import EngineTypes
|
|
18
|
+
from wedata.feature_store.common.log.logger import get_logger
|
|
18
19
|
from wedata.feature_store.common.store_config.redis import RedisStoreConfig
|
|
19
20
|
from wedata.feature_store.entities.feature_table import FeatureTable
|
|
20
21
|
from wedata.feature_store.spark_client.spark_client import SparkClient
|
|
21
22
|
from wedata.feature_store.utils import common_utils, env_utils
|
|
22
23
|
from wedata.feature_store.feast_client.feast_client import FeastClient
|
|
23
|
-
from wedata.feature_store.cloud_sdk_client
|
|
24
|
-
TaskSchedulerConfiguration, OnlineFeatureConfiguration, OfflineFeatureConfiguration,
|
|
25
|
-
CreateOnlineFeatureTableRequest, DescribeNormalSchedulerExecutorGroupsRequest, RefreshFeatureTableRequest)
|
|
24
|
+
from wedata.feature_store.cloud_sdk_client import models
|
|
26
25
|
from wedata.feature_store.cloud_sdk_client.client import FeatureCloudSDK
|
|
27
26
|
|
|
28
27
|
|
|
@@ -44,6 +43,11 @@ class FeatureTableClient:
|
|
|
44
43
|
self.__cloud_secret_id, self.__cloud_secret_key = env_utils.get_cloud_secret()
|
|
45
44
|
self.__project = env_utils.get_project_id()
|
|
46
45
|
self.__region = env_utils.get_region()
|
|
46
|
+
self.__logger = get_logger()
|
|
47
|
+
default_online_table = self._get_offline_default_database()
|
|
48
|
+
if default_online_table:
|
|
49
|
+
env_utils.set_default_database(default_online_table.DatabaseName)
|
|
50
|
+
|
|
47
51
|
|
|
48
52
|
@property
|
|
49
53
|
def cloud_secret_id(self) -> str:
|
|
@@ -310,7 +314,7 @@ class FeatureTableClient:
|
|
|
310
314
|
raise ValueError(f"Engine type {engine_type} is not supported")
|
|
311
315
|
|
|
312
316
|
# 打印sql
|
|
313
|
-
|
|
317
|
+
self.__logger.info(f"create table ddl: {ddl}\n")
|
|
314
318
|
|
|
315
319
|
# 执行DDL
|
|
316
320
|
try:
|
|
@@ -320,8 +324,6 @@ class FeatureTableClient:
|
|
|
320
324
|
except Exception as e:
|
|
321
325
|
raise ValueError(f"Failed to create table: {str(e)}") from e
|
|
322
326
|
|
|
323
|
-
print("async table info to feast")
|
|
324
|
-
|
|
325
327
|
self._feast_client.create_table(
|
|
326
328
|
table_name=table_name,
|
|
327
329
|
primary_keys=primary_keys,
|
|
@@ -332,7 +334,7 @@ class FeatureTableClient:
|
|
|
332
334
|
description=description
|
|
333
335
|
)
|
|
334
336
|
|
|
335
|
-
|
|
337
|
+
self.__logger.info(f"Table '{name}' created successfully. Starting web synchronization.")
|
|
336
338
|
|
|
337
339
|
try:
|
|
338
340
|
self._sync_table_info(table_name=name, action_name="create",
|
|
@@ -340,7 +342,8 @@ class FeatureTableClient:
|
|
|
340
342
|
data_source_name=data_source_name, engine_name=env_utils.get_engine_name(),
|
|
341
343
|
is_try=False)
|
|
342
344
|
except tencentcloud.common.exception.TencentCloudSDKException as e:
|
|
343
|
-
raise RuntimeError(f"
|
|
345
|
+
raise RuntimeError(f"Failed to synchronize web data for table '{name}'. "
|
|
346
|
+
f"Please manually operate on the web page. Error: {str(e)}")
|
|
344
347
|
|
|
345
348
|
# 构建并返回FeatureTable对象
|
|
346
349
|
return FeatureTable(
|
|
@@ -533,21 +536,22 @@ class FeatureTableClient:
|
|
|
533
536
|
raise RuntimeError(f"Table '{name}' is can't create. {str(e)}")
|
|
534
537
|
|
|
535
538
|
# 执行修改
|
|
536
|
-
|
|
539
|
+
self.__logger.info(f"alter table sql: \n {alter_sql}")
|
|
537
540
|
self._spark.sql(alter_sql)
|
|
538
|
-
|
|
541
|
+
self.__logger.debug("Execute sql done, start sync table info to feast")
|
|
539
542
|
self._feast_client.alter_table(full_table_name=table_name, primary_keys=primary_keys,
|
|
540
543
|
timestamp_key=timestamp_key)
|
|
541
|
-
|
|
544
|
+
self.__logger.info(f"Successfully register table '{table_name}'. Starting web synchronization.")
|
|
542
545
|
|
|
543
546
|
try:
|
|
544
547
|
self._sync_table_info(table_name=name, action_name="create",
|
|
545
548
|
database_name=env_utils.get_database_name(database_name),
|
|
546
549
|
data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=False)
|
|
547
550
|
except tencentcloud.common.exception.TencentCloudSDKException as e:
|
|
548
|
-
raise RuntimeError(f"
|
|
549
|
-
|
|
550
|
-
|
|
551
|
+
raise RuntimeError(f"Failed to synchronize web data for table '{name}'. "
|
|
552
|
+
f"Please manually operate on the web page. Error: {str(e)}")
|
|
553
|
+
except (ValueError, RuntimeError):
|
|
554
|
+
raise
|
|
551
555
|
except Exception as e:
|
|
552
556
|
raise RuntimeError(f"Failed to modify properties for table '{table_name}': {str(e)}") from e
|
|
553
557
|
|
|
@@ -624,13 +628,14 @@ class FeatureTableClient:
|
|
|
624
628
|
try:
|
|
625
629
|
# 检查表是否存在
|
|
626
630
|
if not self._check_table_exists(table_name):
|
|
627
|
-
|
|
631
|
+
self.__logger.error(f"Table '{name}' does not exist")
|
|
628
632
|
return
|
|
629
633
|
|
|
630
634
|
try:
|
|
631
635
|
feature_view = self._feast_client.get_feature_view(table_name)
|
|
632
636
|
except Exception as e:
|
|
633
|
-
|
|
637
|
+
pass
|
|
638
|
+
# self.__logger.warning(f"Table '{name}' is not a feature table, skip delete. {str(e)}")
|
|
634
639
|
else:
|
|
635
640
|
if feature_view.online:
|
|
636
641
|
raise ValueError(f"Table '{name}' has a online table, please call drop_online_table first")
|
|
@@ -643,14 +648,14 @@ class FeatureTableClient:
|
|
|
643
648
|
|
|
644
649
|
# 执行删除
|
|
645
650
|
self._spark.sql(f"DROP TABLE {table_name}")
|
|
646
|
-
|
|
651
|
+
self.__logger.info(f"Table '{name}' dropped")
|
|
647
652
|
try:
|
|
648
653
|
self._feast_client.remove_offline_table(table_name=table_name)
|
|
649
654
|
except Exception as e:
|
|
650
655
|
raise
|
|
651
656
|
# raise ValueError(f"Failed to delete table '{name}' in feast: {str(e)}")
|
|
652
657
|
else:
|
|
653
|
-
|
|
658
|
+
self.__logger.info(f"Table '{name}' removed from feast")
|
|
654
659
|
|
|
655
660
|
try:
|
|
656
661
|
self._sync_table_info(table_name=name, action_name="delete",
|
|
@@ -706,7 +711,7 @@ class FeatureTableClient:
|
|
|
706
711
|
schema_name_list = [field.name for field in tmp_schema_list]
|
|
707
712
|
schema = StructType(tmp_schema_list)
|
|
708
713
|
for field in schema:
|
|
709
|
-
|
|
714
|
+
self.__logger.debug(f"translate {field.name} to feast Type: {field.dataType}")
|
|
710
715
|
|
|
711
716
|
feast_client = FeastClient(offline_store=self._spark, online_store_config=online_config)
|
|
712
717
|
# 构建离线表的entity的数据过滤
|
|
@@ -721,7 +726,7 @@ class FeatureTableClient:
|
|
|
721
726
|
full_table_name=full_table_name,
|
|
722
727
|
columns_name=columns_name_list,
|
|
723
728
|
entity_rows=[result_row.asDict()])
|
|
724
|
-
|
|
729
|
+
self.__logger.debug(f"=====>read online dataframe:\n{online_view[schema_name_list]}")
|
|
725
730
|
return self._spark.createDataFrame(online_view[schema_name_list], schema=schema, verifySchema=False)
|
|
726
731
|
else:
|
|
727
732
|
return self._spark.createDataFrame([])
|
|
@@ -730,7 +735,7 @@ class FeatureTableClient:
|
|
|
730
735
|
full_table_name=full_table_name,
|
|
731
736
|
columns_name=columns_name_list,
|
|
732
737
|
entity_rows=entity_row)
|
|
733
|
-
|
|
738
|
+
self.__logger.debug(f"=====>read online dataframe:\n{online_view[schema_name_list]}")
|
|
734
739
|
return self._spark.createDataFrame(online_view[schema_name_list], schema=schema, verifySchema=False)
|
|
735
740
|
|
|
736
741
|
def get_table(
|
|
@@ -827,9 +832,9 @@ class FeatureTableClient:
|
|
|
827
832
|
except Exception as e:
|
|
828
833
|
raise RuntimeError(f"Failed to modify properties for table '{name}': {str(e)}") from e
|
|
829
834
|
|
|
830
|
-
def publish_table(self, table_name: str, data_source_name: str,
|
|
835
|
+
def publish_table(self, table_name: str, data_source_name: str,
|
|
831
836
|
database_name: Optional[str] = None,
|
|
832
|
-
is_cycle: bool = False, cycle_obj: TaskSchedulerConfiguration = None,
|
|
837
|
+
is_cycle: bool = False, cycle_obj: models.TaskSchedulerConfiguration = None,
|
|
833
838
|
is_use_default_online: bool = True, online_config: RedisStoreConfig = None):
|
|
834
839
|
"""
|
|
835
840
|
将离线特征表发布为在线特征表
|
|
@@ -852,46 +857,43 @@ class FeatureTableClient:
|
|
|
852
857
|
# 检查是否已经发布,查看Redis中是否有值
|
|
853
858
|
try:
|
|
854
859
|
# 获取离线表的列名
|
|
855
|
-
online_data = self.
|
|
856
|
-
table_name=table_name,
|
|
857
|
-
database_name=database_name,
|
|
858
|
-
online_config=online_config)
|
|
860
|
+
online_data = self._feast_client.get_feature_view(full_table_name)
|
|
859
861
|
except Exception as e:
|
|
860
862
|
print(f"Failed to get online table view for table '{full_table_name}': {str(e)}")
|
|
861
863
|
else:
|
|
862
|
-
if online_data:
|
|
864
|
+
if online_data.online:
|
|
863
865
|
raise ValueError(f"Table '{full_table_name}' has already been published")
|
|
864
866
|
|
|
865
867
|
# 配置周期性参数
|
|
866
868
|
if is_cycle:
|
|
867
|
-
if not isinstance(cycle_obj, TaskSchedulerConfiguration):
|
|
869
|
+
if not isinstance(cycle_obj, models.TaskSchedulerConfiguration):
|
|
868
870
|
raise ValueError("cycle_obj must be a TaskSchedulerConfiguration object when is_cycle is True")
|
|
869
871
|
|
|
870
872
|
cycle_obj.CycleType = "CRONTAB_CYCLE"
|
|
871
873
|
else:
|
|
872
|
-
if isinstance(cycle_obj, TaskSchedulerConfiguration):
|
|
874
|
+
if isinstance(cycle_obj, models.TaskSchedulerConfiguration):
|
|
873
875
|
cycle_obj.CycleType = "ONEOFF_CYCLE"
|
|
874
876
|
else:
|
|
875
|
-
cycle_obj = TaskSchedulerConfiguration()
|
|
877
|
+
cycle_obj = models.TaskSchedulerConfiguration()
|
|
876
878
|
cycle_obj.CycleType = "ONEOFF_CYCLE"
|
|
877
879
|
# 设置默认当前时间延后1分钟
|
|
878
880
|
cycle_obj.CrontabExpression = (datetime.datetime.now() + datetime.timedelta(minutes=3)).strftime(
|
|
879
881
|
"%M %H %d %m %w ? %y")
|
|
880
882
|
|
|
881
883
|
if is_use_default_online:
|
|
882
|
-
online_feature_config = OnlineFeatureConfiguration()
|
|
884
|
+
online_feature_config = models.OnlineFeatureConfiguration()
|
|
883
885
|
online_feature_config.UserDefault = True
|
|
884
886
|
else:
|
|
885
887
|
if not isinstance(online_config, RedisStoreConfig):
|
|
886
888
|
raise ValueError("online_config must be a RedisStoreConfig object when is_use_default_online is False")
|
|
887
889
|
|
|
888
|
-
online_feature_config = OnlineFeatureConfiguration()
|
|
890
|
+
online_feature_config = models.OnlineFeatureConfiguration()
|
|
889
891
|
online_feature_config.UserDefault = False
|
|
890
892
|
online_feature_config.Host = online_config.host
|
|
891
893
|
online_feature_config.Port = online_config.port
|
|
892
894
|
online_feature_config.DB = online_config.db
|
|
893
895
|
|
|
894
|
-
offline_feature_config = OfflineFeatureConfiguration()
|
|
896
|
+
offline_feature_config = models.OfflineFeatureConfiguration()
|
|
895
897
|
offline_feature_config.DatabaseName = env_utils.get_database_name(database_name)
|
|
896
898
|
offline_feature_config.TableName = table_name
|
|
897
899
|
|
|
@@ -902,7 +904,7 @@ class FeatureTableClient:
|
|
|
902
904
|
offline_feature_config.DatasourceType = env_utils.get_engine_type()
|
|
903
905
|
offline_feature_config.EngineName = env_utils.get_engine_name()
|
|
904
906
|
|
|
905
|
-
api_requests = CreateOnlineFeatureTableRequest()
|
|
907
|
+
api_requests = models.CreateOnlineFeatureTableRequest()
|
|
906
908
|
api_requests.OfflineFeatureConfiguration = offline_feature_config
|
|
907
909
|
api_requests.OnlineFeatureConfiguration = online_feature_config
|
|
908
910
|
api_requests.TaskSchedulerConfiguration = cycle_obj
|
|
@@ -910,11 +912,11 @@ class FeatureTableClient:
|
|
|
910
912
|
region = env_utils.get_region()
|
|
911
913
|
if not os.environ.get("RESOURCE_GROUP_ID", ""):
|
|
912
914
|
res_group_item = _get_default_resource_group(
|
|
913
|
-
api_requests.ProjectId,
|
|
915
|
+
api_requests.ProjectId, self.__cloud_secret_id, self.__cloud_secret_key, region)
|
|
914
916
|
api_requests.ResourceGroupId = res_group_item.ExecutorGroupId
|
|
915
917
|
else:
|
|
916
918
|
api_requests.ResourceGroupId = os.environ.get("RESOURCE_GROUP_ID")
|
|
917
|
-
client = FeatureCloudSDK(secret_id=
|
|
919
|
+
client = FeatureCloudSDK(secret_id=self.__cloud_secret_id, secret_key=self.__cloud_secret_key, region=region)
|
|
918
920
|
resp = client.CreateOnlineFeatureTable(api_requests)
|
|
919
921
|
if cycle_obj.CycleType == "ONEOFF_CYCLE":
|
|
920
922
|
print(f"publish online task create success. it will be execute after 3 min. {resp.Data.OnlineTableId} {resp.Data.OfflineTableId} ")
|
|
@@ -955,10 +957,23 @@ class FeatureTableClient:
|
|
|
955
957
|
def _check_table_exists(self, full_table_name: str) -> bool:
|
|
956
958
|
return common_utils.check_spark_table_exists(self._spark, full_table_name)
|
|
957
959
|
|
|
960
|
+
def _get_offline_default_database(self) -> Optional[models.FeatureStoreDatabase]:
|
|
961
|
+
client = FeatureCloudSDK(secret_id=self.__cloud_secret_id, secret_key=self.__cloud_secret_key,
|
|
962
|
+
region=self.__region)
|
|
963
|
+
req = models.DescribeFeatureStoreDatabasesRequest()
|
|
964
|
+
req.ProjectId = self.__project
|
|
965
|
+
rsp = client.DescribeFeatureStoreDatabases(req)
|
|
966
|
+
if len(rsp.Data) == 0:
|
|
967
|
+
return None
|
|
968
|
+
for item in rsp.Data:
|
|
969
|
+
if item.OnlineMode == 0 and item.IsDefault == 1:
|
|
970
|
+
return item
|
|
971
|
+
return None
|
|
972
|
+
|
|
958
973
|
|
|
959
974
|
def _get_default_resource_group(project_id: str, secret_id: str, secret_key: str, region: str):
|
|
960
975
|
client = FeatureCloudSDK(secret_id=secret_id, secret_key=secret_key, region=region)
|
|
961
|
-
request = DescribeNormalSchedulerExecutorGroupsRequest()
|
|
976
|
+
request = models.DescribeNormalSchedulerExecutorGroupsRequest()
|
|
962
977
|
request.ProjectId = project_id
|
|
963
978
|
resp = client.DescribeNormalSchedulerExecutorGroups(request)
|
|
964
979
|
# 默认取第一个健康可用的资源组进行执行
|
|
@@ -972,7 +987,7 @@ def _refresh_table(project_id: str, secret_id: str, secret_key: str, region: str
|
|
|
972
987
|
action: str, database_name: str, data_source_name: str, data_source_type: str,
|
|
973
988
|
engine_name: str, is_try: bool):
|
|
974
989
|
client = FeatureCloudSDK(secret_id=secret_id, secret_key=secret_key, region=region)
|
|
975
|
-
request = RefreshFeatureTableRequest()
|
|
990
|
+
request = models.RefreshFeatureTableRequest()
|
|
976
991
|
request.ProjectId = project_id
|
|
977
992
|
request.TableName = table_name
|
|
978
993
|
request.DatabaseName = database_name
|
|
@@ -360,6 +360,7 @@ class TrainingSetClient:
|
|
|
360
360
|
env_manager: Optional[str] = None,
|
|
361
361
|
local_uri: Optional[str] = None,
|
|
362
362
|
params: Optional[dict[str, Any]] = None,
|
|
363
|
+
timestamp_key: str = None,
|
|
363
364
|
**kwargs,
|
|
364
365
|
) -> DataFrame:
|
|
365
366
|
# TODO:ML 待确定是否需要
|
|
@@ -388,16 +389,13 @@ class TrainingSetClient:
|
|
|
388
389
|
"The provided DataFrame for scoring must have unique column names. Found duplicates {}.",
|
|
389
390
|
)
|
|
390
391
|
artifact_path = os.path.join("artifacts", MODEL_DATA_PATH_ROOT)
|
|
391
|
-
# print(f"artifact_path: {artifact_path}")
|
|
392
392
|
with (TempDir() as tmp_location):
|
|
393
393
|
local_path = (
|
|
394
394
|
local_uri
|
|
395
395
|
if local_uri
|
|
396
396
|
else common_utils.download_model_artifacts(model_uri, tmp_location.path())
|
|
397
397
|
)
|
|
398
|
-
# print(f"wedata local_path:{local_path}")
|
|
399
398
|
model_data_path = os.path.join(local_path, artifact_path)
|
|
400
|
-
# print(f"artifact_path: {artifact_path}")
|
|
401
399
|
|
|
402
400
|
# Augment local workspace metastore tables from 2L to 3L,
|
|
403
401
|
# this will prevent us from erroneously reading data from other catalogs
|
|
@@ -425,6 +423,7 @@ class TrainingSetClient:
|
|
|
425
423
|
feature_input_keys = []
|
|
426
424
|
for fci in feature_spec.feature_column_infos:
|
|
427
425
|
feature_input_keys.extend([k for k in fci.lookup_key])
|
|
426
|
+
|
|
428
427
|
on_demand_input_names = uc_utils.get_unique_list_order(
|
|
429
428
|
[
|
|
430
429
|
input_name
|
|
@@ -436,18 +435,13 @@ class TrainingSetClient:
|
|
|
436
435
|
source_data_names = [
|
|
437
436
|
sdci.name for sdci in feature_spec.source_data_column_infos
|
|
438
437
|
]
|
|
439
|
-
# print(f"wedata source_data_names:{source_data_names}")
|
|
440
|
-
|
|
441
|
-
print("===>source_data_names:", source_data_names)
|
|
442
438
|
|
|
443
439
|
feature_output_names = [
|
|
444
440
|
fci.output_name for fci in feature_spec.feature_column_infos
|
|
445
441
|
]
|
|
446
|
-
print("====>feature_output_names:", feature_output_names)
|
|
447
442
|
on_demand_output_names = [
|
|
448
443
|
odci.output_name for odci in feature_spec.on_demand_column_infos
|
|
449
444
|
]
|
|
450
|
-
print("====>on_demand_output_names:", on_demand_output_names)
|
|
451
445
|
all_output_names = set(
|
|
452
446
|
source_data_names + feature_output_names + on_demand_output_names
|
|
453
447
|
)
|
|
@@ -16,13 +16,9 @@ from mlflow.utils import databricks_utils
|
|
|
16
16
|
|
|
17
17
|
from wedata.feature_store.constants import constants
|
|
18
18
|
from wedata.feature_store.constants.constants import MODEL_DATA_PATH_ROOT
|
|
19
|
+
from wedata.feature_store.common.log.logger import get_logger
|
|
19
20
|
from pyspark.sql import SparkSession
|
|
20
21
|
|
|
21
|
-
import logging
|
|
22
|
-
|
|
23
|
-
# 配置日志(可选,根据实际情况配置)
|
|
24
|
-
logging.basicConfig(level=logging.ERROR)
|
|
25
|
-
|
|
26
22
|
|
|
27
23
|
def validate_table_name(name: str):
|
|
28
24
|
"""
|
|
@@ -57,14 +53,16 @@ def build_full_table_name(table_name: str, database_name: Optional[str] = None)
|
|
|
57
53
|
"""
|
|
58
54
|
|
|
59
55
|
feature_store_database_name = os.environ.get("WEDATA_DEFAULT_FEATURE_STORE_DATABASE")
|
|
56
|
+
logger = get_logger()
|
|
60
57
|
if database_name:
|
|
61
58
|
feature_store_database_name = database_name
|
|
62
59
|
|
|
63
60
|
if not feature_store_database_name:
|
|
64
|
-
|
|
61
|
+
logger.error("The current user has not configured a default feature database. "
|
|
62
|
+
"Please contact the manager account to configure it.")
|
|
65
63
|
raise RuntimeError("Feature store is not configured! Please contact the main account to configure it.")
|
|
66
64
|
|
|
67
|
-
|
|
65
|
+
logger.debug("feature database:{}".format(feature_store_database_name))
|
|
68
66
|
|
|
69
67
|
feature_store_database = f"{feature_store_database_name}.{table_name}"
|
|
70
68
|
|
|
@@ -68,10 +68,22 @@ def get_database_name(database_name: str) -> str:
|
|
|
68
68
|
"please check environment configuration")
|
|
69
69
|
|
|
70
70
|
|
|
71
|
+
def set_default_database(database_name: str):
|
|
72
|
+
"""
|
|
73
|
+
设置默认数据库名称
|
|
74
|
+
"""
|
|
75
|
+
if not isinstance(database_name, str):
|
|
76
|
+
raise ValueError("database_name must be a string")
|
|
77
|
+
os.environ["WEDATA_DEFAULT_FEATURE_STORE_DATABASE"] = database_name
|
|
78
|
+
|
|
79
|
+
|
|
71
80
|
def get_engine_name() -> str:
|
|
72
81
|
"""
|
|
73
82
|
获取引擎名称
|
|
74
83
|
"""
|
|
84
|
+
# 因为DLC有特殊,所以先判断DLC,如果没有再判断EMR
|
|
85
|
+
if get_engine_type() == "DLC":
|
|
86
|
+
return _get_variable("KERNEL_ENGINE")
|
|
75
87
|
return _get_variable("KERNEL_ENGINE_NAME")
|
|
76
88
|
|
|
77
89
|
|