tencent-wedata-feature-engineering-dev 0.1.50__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tencent-wedata-feature-engineering-dev might be problematic. Click here for more details.

Files changed (38) hide show
  1. {tencent_wedata_feature_engineering_dev-0.1.50.dist-info → tencent_wedata_feature_engineering_dev-0.2.0.dist-info}/METADATA +10 -8
  2. tencent_wedata_feature_engineering_dev-0.2.0.dist-info/RECORD +46 -0
  3. {tencent_wedata_feature_engineering_dev-0.1.50.dist-info → tencent_wedata_feature_engineering_dev-0.2.0.dist-info}/WHEEL +1 -1
  4. wedata/feature_store/client.py +28 -92
  5. wedata/feature_store/constants/constants.py +2 -5
  6. wedata/feature_store/entities/feature_lookup.py +0 -17
  7. wedata/feature_store/entities/feature_spec.py +2 -2
  8. wedata/feature_store/entities/feature_table.py +1 -5
  9. wedata/feature_store/entities/function_info.py +4 -1
  10. wedata/feature_store/feature_table_client/feature_table_client.py +53 -528
  11. wedata/feature_store/spark_client/spark_client.py +15 -41
  12. wedata/feature_store/training_set_client/training_set_client.py +10 -9
  13. wedata/feature_store/utils/common_utils.py +4 -48
  14. wedata/feature_store/utils/feature_lookup_utils.py +43 -37
  15. wedata/feature_store/utils/feature_spec_utils.py +1 -1
  16. wedata/feature_store/utils/uc_utils.py +1 -1
  17. tencent_wedata_feature_engineering_dev-0.1.50.dist-info/RECORD +0 -66
  18. wedata/feature_store/cloud_sdk_client/__init__.py +0 -0
  19. wedata/feature_store/cloud_sdk_client/client.py +0 -108
  20. wedata/feature_store/cloud_sdk_client/models.py +0 -686
  21. wedata/feature_store/cloud_sdk_client/utils.py +0 -39
  22. wedata/feature_store/common/log/__init__.py +0 -0
  23. wedata/feature_store/common/log/logger.py +0 -40
  24. wedata/feature_store/common/store_config/__init__.py +0 -0
  25. wedata/feature_store/common/store_config/redis.py +0 -48
  26. wedata/feature_store/constants/engine_types.py +0 -34
  27. wedata/feature_store/feast_client/__init__.py +0 -0
  28. wedata/feature_store/feast_client/feast_client.py +0 -487
  29. wedata/feature_store/utils/env_utils.py +0 -108
  30. wedata/tempo/__init__.py +0 -0
  31. wedata/tempo/interpol.py +0 -448
  32. wedata/tempo/intervals.py +0 -1331
  33. wedata/tempo/io.py +0 -61
  34. wedata/tempo/ml.py +0 -129
  35. wedata/tempo/resample.py +0 -318
  36. wedata/tempo/tsdf.py +0 -1720
  37. wedata/tempo/utils.py +0 -254
  38. {tencent_wedata_feature_engineering_dev-0.1.50.dist-info → tencent_wedata_feature_engineering_dev-0.2.0.dist-info}/top_level.txt +0 -0
File without changes
@@ -1,40 +0,0 @@
1
- import logging
2
- import sys
3
-
4
-
5
- class LoggerSingleton:
6
- _instance = None
7
-
8
- def __new__(cls):
9
- if cls._instance is None:
10
- cls._instance = super().__new__(cls)
11
- cls._instance._initialize_logger()
12
- return cls._instance
13
-
14
- def _initialize_logger(self):
15
- self.logger = logging.getLogger("wedata-feature-engineering")
16
- self.logger.setLevel(logging.INFO)
17
-
18
- # 清除已有的handler,避免重复添加
19
- if self.logger.handlers:
20
- self.logger.handlers.clear()
21
-
22
- # 创建formatter,包含时间、文件名和行号
23
- formatter = logging.Formatter(
24
- fmt='%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s',
25
- datefmt='%Y-%m-%d %H:%M:%S'
26
- )
27
-
28
- # 创建handler并输出到stdout
29
- handler = logging.StreamHandler(sys.stdout)
30
- handler.setFormatter(formatter)
31
-
32
- self.logger.addHandler(handler)
33
-
34
- def get_logger(self):
35
- return self.logger
36
-
37
-
38
- def get_logger():
39
- """获取单例logger实例"""
40
- return LoggerSingleton().get_logger()
File without changes
@@ -1,48 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- __doc__ = """
4
- Feature Redis存储配置
5
- """
6
-
7
-
8
- class RedisStoreConfig:
9
- def __init__(self, host='localhost', port=6379, db=0, password=None, instance_id=None):
10
- self._host = host
11
- self._port = port
12
- self._db = db
13
- self._password = password
14
- self._instance_id = instance_id
15
-
16
- @property
17
- def host(self):
18
- return self._host
19
-
20
- @property
21
- def port(self):
22
- return self._port
23
-
24
- @property
25
- def db(self):
26
- return self._db
27
-
28
- @property
29
- def password(self):
30
- return self._password
31
-
32
- @property
33
- def instance_id(self):
34
- return self._instance_id
35
-
36
- @property
37
- def connection_string(self):
38
- if self.password:
39
- connection_string = f"{self.host}:{self.port},db={self.db},password={self._password}"
40
- else:
41
- connection_string = f"{self.host}:{self.port},db={self.db}"
42
- return connection_string
43
-
44
- def __repr__(self):
45
- return f"RedisStoreConfig(host={self.host}, port={self.port}, db={self.db}, instance_id={self.instance_id})"
46
-
47
- def __str__(self):
48
- return self.__repr__()
@@ -1,34 +0,0 @@
1
- from enum import Enum
2
- import os
3
-
4
-
5
- class EngineTypes(Enum):
6
- HIVE_ENGINE = "hive"
7
- ICEBERG_ENGINE = "iceberg"
8
-
9
- @classmethod
10
- def get_engine(cls, engine_name: str) -> 'EngineTypes':
11
- try:
12
- return cls(engine_name.lower())
13
- except ValueError:
14
- raise ValueError(f"Invalid engine type: {engine_name}. Supported engine types: {list(cls)}")
15
-
16
-
17
- class CalculateEngineTypes(Enum):
18
- DLC = "dlc"
19
- EMR = "emr"
20
-
21
- @classmethod
22
- def get_calculate_engine(cls, engine_name: str) -> 'CalculateEngineTypes':
23
- try:
24
- return cls(engine_name.lower())
25
- except ValueError:
26
- raise ValueError(f"Invalid engine type: {engine_name}. Supported engine types: {list(cls)}")
27
-
28
-
29
- def judge_engine_type() -> 'CalculateEngineTypes':
30
- if os.environ.get("DLC_REGION", ""):
31
- return CalculateEngineTypes.DLC
32
- else:
33
- return CalculateEngineTypes.EMR
34
-
File without changes
@@ -1,487 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- __doc__ = """
4
- Feast客户端,用于与Feast服务器交互
5
- """
6
-
7
- import json
8
- import os
9
- import re
10
- from datetime import datetime
11
- from typing import List, Dict, Optional, Any, Tuple
12
- try:
13
- # pyspark 3.5.0 以后
14
- from pyspark.errors import IllegalArgumentException
15
- except ModuleNotFoundError:
16
- from pyspark.sql.utils import IllegalArgumentException
17
-
18
- import pandas
19
- import pytz
20
- from feast import FeatureStore, RepoConfig, FeatureView
21
- from pyspark.sql import DataFrame, SparkSession
22
- from wedata.feature_store.common.store_config.redis import RedisStoreConfig
23
- from wedata.feature_store.utils import env_utils
24
- from feast import Entity, FeatureService
25
- from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import SparkSource
26
- from feast.infra.online_stores.redis import RedisOnlineStore
27
- from feast.errors import FeatureServiceNotFoundException
28
- from feast.types import ValueType
29
- from pyspark.sql.types import (
30
- TimestampType, DateType, StructType, NullType, ByteType, IntegerType, DecimalType, DoubleType, FloatType,
31
- BooleanType,
32
- StringType, ArrayType, LongType
33
- )
34
-
35
- TEMP_FILE_PATH = "/tmp/feast_data/"
36
-
37
-
38
- class FeastClient:
39
-
40
- def __init__(self, offline_store: SparkSession, online_store_config: RedisStoreConfig = None):
41
- project_id = env_utils.get_project_id()
42
- remote_path = env_utils.get_feast_remote_url()
43
- if offline_store is None or not isinstance(offline_store, SparkSession):
44
- raise ValueError("offline_store must be provided SparkSession instance")
45
-
46
- # 应用Spark配置
47
- spark_conf_dict = dict()
48
- spark_conf = offline_store.sparkContext.getConf().getAll()
49
- for item in spark_conf:
50
- spark_conf_dict[item[0]] = item[1]
51
-
52
- config = RepoConfig(
53
- project=project_id,
54
- registry={"registry_type": "remote", "path": remote_path},
55
- provider="local",
56
- online_store={"type": "redis",
57
- "connection_string": online_store_config.connection_string} if online_store_config else None,
58
- offline_store={"type": "spark", "spark_conf": spark_conf_dict},
59
- batch_engine={"type": "spark.engine"},
60
- entity_key_serialization_version=2
61
- )
62
- self._client = FeatureStore(config=config)
63
- self._spark = offline_store
64
- self._spark.builder.enableHiveSupport()
65
- # 设置Spark时区为pytz时区,避免后续spark操作toPandas时出现时区问题
66
- try:
67
- spark_timezone = self._spark.conf.get("spark.sql.session.timeZone", "")
68
- if spark_timezone:
69
- pytz_timezone = _translate_spark_timezone(spark_timezone)
70
- self._spark.conf.set("spark.sql.session.timeZone", pytz_timezone)
71
- else:
72
- self._spark.conf.set("spark.sql.session.timeZone", "Etc/GMT+8")
73
- except IllegalArgumentException:
74
- self._spark.conf.set("spark.sql.session.timeZone", "Etc/GMT+8")
75
-
76
- @property
77
- def client(self):
78
- return self._client
79
-
80
- def create_table(self,
81
- table_name: str,
82
- primary_keys: List[str],
83
- timestamp_key: str,
84
- df: Optional[DataFrame] = None,
85
- schema: Optional[StructType] = None,
86
- tags: Optional[Dict[str, str]] = None,
87
- description: Optional[str] = None):
88
- if schema is not None and df is None:
89
- # 创建空的Spark DataFrame
90
- df = self._spark.createDataFrame([], schema)
91
- feast_table_name = translate_table_name_to_feast(table_name)
92
- entities = _get_entity_from_schema(feast_table_name, df.schema, primary_keys)
93
- feature_view = _create_table_to_feature_view(
94
- table_name=table_name,
95
- primary_keys=primary_keys,
96
- entities=entities,
97
- timestamp_key=timestamp_key,
98
- df=df,
99
- tags=tags,
100
- description=description
101
- )
102
- # 确保feature在增量服务时获取的数据时间范围时正确的。
103
- self._apply_feature_view(table_name, entities, feature_view)
104
-
105
- def _apply_feature_view(self, table_name, entities, feature_view: FeatureView):
106
- database_name, old_table_name = table_name.split(".")
107
- try:
108
- feature_service = self._client.get_feature_service(database_name)
109
- except FeatureServiceNotFoundException:
110
- feature_service = FeatureService(name=database_name, features=[feature_view])
111
- else:
112
- if feature_service.name == "":
113
- feature_service = FeatureService(name=database_name, features=[feature_view])
114
- else:
115
- # 对于已存在的FeatureService,需要更新其中的FeatureView
116
- update_flag = False
117
- for index in range(0, len(feature_service.feature_view_projections)):
118
- if feature_service.feature_view_projections[index].name == feature_view.name:
119
- # update feature_view
120
- feature_service.feature_view_projections[index] = feature_view.projection
121
- update_flag = True
122
- break
123
- if not update_flag:
124
- feature_service.feature_view_projections.append(feature_view.projection)
125
- self._client.apply(feature_view)
126
- self._client.apply(entities)
127
- self._client.apply(feature_service)
128
-
129
- def remove_offline_table(self, table_name: str):
130
- feast_table_name = translate_table_name_to_feast(table_name)
131
- database_name, old_table_name = table_name.split(".")
132
- self._client.registry.delete_data_source(feast_table_name, self._client.project)
133
- try:
134
- feature_view = self.get_feature_view(table_name)
135
- except Exception as e:
136
- pass
137
- else:
138
- try:
139
- feature_service = self._client.get_feature_service(database_name)
140
- except Exception as e:
141
- print(f"feature_service:{database_name} not found")
142
- else:
143
- for index in range(0, len(feature_service.feature_view_projections)):
144
- if feature_service.feature_view_projections[index].name == feature_view.name:
145
- feature_service.feature_view_projections.pop(index)
146
- break
147
- self._client.apply(feature_service)
148
- self._client.registry.delete_feature_view(feast_table_name, self._client.project)
149
-
150
- def get_feature_view(self, table_name: str):
151
- feast_table_name = translate_table_name_to_feast(table_name)
152
- return self._client.get_feature_view(feast_table_name)
153
-
154
- def remove_online_table(self, table_name: str):
155
- if not self._client.config.online_store:
156
- raise ValueError("Online store is not configured")
157
-
158
- feast_table_name = translate_table_name_to_feast(table_name)
159
- table_view = self._client.get_feature_view(feast_table_name)
160
- if not table_view:
161
- raise ValueError(f"Table {table_name} not found in Feast")
162
-
163
- if self._client.config.online_store.type == "redis":
164
- redis_online_store = RedisOnlineStore()
165
- redis_online_store.delete_table(self._client.config, table_view)
166
- table_view.online = False
167
- table_view.update_materialization_intervals(get_materialization_default_time())
168
- self._client.apply(table_view)
169
- else:
170
- raise ValueError(f"Unsupported online store type: {self._client.config.online_store.type}")
171
-
172
- self._client.refresh_registry()
173
-
174
- def alter_table(self, full_table_name: str, timestamp_key: str, primary_keys: List[str]):
175
- """
176
- 将已注册的Delta表同步到Feast中作为离线特征数据
177
-
178
- Args:
179
- full_table_name: 表名(格式:<table>)
180
- timestamp_key: 时间戳列名
181
- primary_keys: 主键列名列表
182
- Raises:
183
- ValueError: 当表不存在或参数无效时抛出
184
- RuntimeError: 当同步操作失败时抛出
185
- """
186
- import logging
187
- try:
188
-
189
- # 1. 读取Delta表数据和schema
190
- df = self._spark.table(full_table_name)
191
-
192
- feast_table_name = translate_table_name_to_feast(full_table_name)
193
- entities = _get_entity_from_schema(feast_table_name, df.schema, primary_keys)
194
- # 2. 从表属性中获取主键和时间戳列
195
- tbl_props = self._spark.sql(f"SHOW TBLPROPERTIES {full_table_name}").collect()
196
- props = {row['key']: row['value'] for row in tbl_props}
197
-
198
- if not primary_keys:
199
- raise ValueError("Primary keys not found in table properties")
200
- if not timestamp_key:
201
- raise ValueError("Timestamp keys not found in table properties")
202
-
203
- logging.info(f"Primary keys: {primary_keys}")
204
- logging.info(f"Timestamp keys: {timestamp_key}")
205
-
206
- # 3. 创建或更新FeatureView
207
- feature_view = _create_table_to_feature_view(
208
- table_name=full_table_name,
209
- entities=entities,
210
- primary_keys=primary_keys,
211
- timestamp_key=timestamp_key,
212
- df=df,
213
- tags={"source": "delta_table", **json.loads(props.get("tags", "{}"))},
214
- )
215
-
216
- self._apply_feature_view(full_table_name, entities, feature_view)
217
- # 4. 应用到Feast
218
- logging.info(f"Successfully synced Delta table {full_table_name} to Feast")
219
-
220
- except Exception as e:
221
- logging.error(f"Failed to sync Delta table to Feast: {str(e)}")
222
- raise RuntimeError(f"Failed to sync Delta table {full_table_name} to Feast: {str(e)}") from e
223
-
224
- def modify_tags(
225
- self,
226
- table_name: str,
227
- tags: Dict[str, str]
228
- ) -> None:
229
- """修改特征表的标签信息
230
-
231
- Args:
232
- table_name: 特征表名称(格式: <database>.<table>)
233
- tags: 要更新的标签字典
234
-
235
- Raises:
236
- ValueError: 当参数无效时抛出
237
- RuntimeError: 当修改操作失败时抛出
238
- """
239
- if not table_name:
240
- raise ValueError("table_name cannot be empty")
241
- if not tags:
242
- raise ValueError("tags cannot be empty")
243
-
244
- feast_table_name = translate_table_name_to_feast(table_name)
245
- try:
246
- # 获取现有的FeatureView
247
- feature_view = self._client.get_feature_view(feast_table_name)
248
- if not feature_view:
249
- raise ValueError(f"FeatureView '{table_name}' not found")
250
-
251
- # 更新标签
252
- current_tags = feature_view.tags or {}
253
- current_tags.update(tags)
254
- feature_view.tags = current_tags
255
-
256
- # 应用更新
257
- self._client.apply([feature_view])
258
- print(f"Successfully updated tags for table '{table_name}'")
259
-
260
- except Exception as e:
261
- raise RuntimeError(f"Failed to modify tags for table '{table_name}': {str(e)}") from e
262
-
263
- def get_online_table_view(self, full_table_name: str, columns_name: List[str], entity_rows: List[Dict[str, Any]]) -> pandas.DataFrame:
264
- """
265
- 获取在线特征表的数据
266
- args:
267
- full_table_name: 特征表名称(格式: <database>.<table>)
268
- return:
269
- FeatureView实例
270
- """
271
- feast_table = translate_table_name_to_feast(full_table_name)
272
- feature_names = []
273
- for column_name in columns_name:
274
- feature_names.append(f"{feast_table}:{column_name}")
275
-
276
- if isinstance(entity_rows, list):
277
- new_entity_rows = []
278
- for entity_row in entity_rows:
279
- temp_entity_row = {}
280
- for key, value in entity_row.items():
281
- temp_entity_row[_get_entity_name(full_table_name, key)] = value
282
- new_entity_rows.append(temp_entity_row)
283
- elif isinstance(entity_rows, dict):
284
- new_entity_rows = {}
285
- for key, value in entity_rows.items():
286
- new_entity_rows[_get_entity_name(full_table_name, key)] = value
287
- else:
288
- raise TypeError("entity_rows must be a list or dict")
289
-
290
- try:
291
- self._client.refresh_registry()
292
- online_stores = self._client.get_online_features(features=feature_names, entity_rows=new_entity_rows)
293
- except UnboundLocalError as e:
294
- raise ValueError(f"{full_table_name} table not in feast registry. {str(e)}")
295
-
296
- return online_stores.to_df()
297
-
298
- def read_offline_table(self, table_name: str, database_name: str, columns_df: pandas.DataFrame,
299
- full_feature_names=True) -> pandas.DataFrame:
300
- """
301
- 获取离线特征表的数据(存储到Feast中的数据)
302
- """
303
- if not isinstance(columns_df, pandas.DataFrame):
304
- raise TypeError("columns_df must be a pandas.DataFrame instance")
305
-
306
- full_table_name = f"{database_name}.{table_name}"
307
- feast_table_name = translate_table_name_to_feast(full_table_name)
308
- # 批量替换DataFrame列名
309
- rename_dict = {}
310
- for column_name in columns_df.columns:
311
- rename_dict[column_name] = _get_entity_name(feast_table_name, column_name)
312
-
313
- columns_df.rename(columns=rename_dict, inplace=True)
314
- features = self._client.get_feature_service(database_name, allow_cache=False)
315
- result = self._client.get_historical_features(
316
- entity_df=columns_df, features=features, full_feature_names=full_feature_names)
317
- return result.to_df()
318
-
319
-
320
- def _create_table_to_feature_view(
321
- table_name: str,
322
- entities: List[Entity],
323
- primary_keys: List[str],
324
- timestamp_key: str,
325
- df: Optional[DataFrame],
326
- tags: Optional[Dict[str, str]] = None,
327
- description: Optional[str] = None,
328
- ):
329
- """
330
-
331
- Returns:
332
- FeatureView实例
333
- """
334
- if primary_keys is None or len(primary_keys) == 0:
335
- raise ValueError("primary_keys must not be empty")
336
- if not timestamp_key:
337
- raise ValueError("timestamp_keys must not be empty")
338
-
339
- os.makedirs(TEMP_FILE_PATH, exist_ok=True)
340
-
341
- temp_file = os.path.join(TEMP_FILE_PATH, f"{table_name}.parquet")
342
-
343
- df.write.parquet(f"file://{temp_file}", mode="overwrite")
344
- feast_table_name = translate_table_name_to_feast(table_name)
345
- resources = SparkSource(
346
- name=feast_table_name,
347
- table=table_name,
348
- # path=f"file://{temp_file}",
349
- timestamp_field=timestamp_key,
350
- # query=f"SELECT * FROM {table_name}",
351
- # file_format="parquet",
352
- tags=tags,
353
- description=description,
354
- )
355
-
356
- # 构建FeatureView的剩余逻辑
357
- feature_view = FeatureView(
358
- name=feast_table_name,
359
- entities=entities,
360
- tags=tags,
361
- source=resources,
362
- )
363
- feature_view.online = False
364
- feature_view.update_materialization_intervals([(datetime(1990, 1, 1), datetime(1990, 1, 1))])
365
- return feature_view
366
-
367
-
368
- def _translate_spark_timezone(timezone: str) -> str:
369
- """
370
- 将Spark时区字符串转换为pytz时区字符串
371
- Args:
372
- timezone: Spark时区字符串
373
- Returns:
374
- Feast时区字符串
375
- """
376
- try:
377
- py_timezone = pytz.timezone(timezone)
378
- except pytz.exceptions.UnknownTimeZoneError:
379
- # GMT+08:00 转换为 'Etc/GMT+8'
380
- result = re.compile(r"GMT([+-])(\d{2}):(\d{2})").match(timezone)
381
- if result:
382
- groups = result.groups()
383
- if len(groups) == 3:
384
- return f"Etc/GMT{groups[0]}{int(groups[1])}"
385
- else:
386
- raise ValueError(f"Invalid timezone string: {timezone}")
387
- else:
388
- return str(py_timezone)
389
-
390
- return timezone
391
-
392
-
393
- def _get_entity_name(table_name: str, field_name: str):
394
- return field_name
395
- # return f"{table_name}_{field_name}"
396
-
397
-
398
- def _get_entity_from_schema(table_name:str, schema: StructType, primary_list: List[str] = None) -> List[Entity]:
399
- """
400
- Args:
401
- table_name: 表名
402
- schema: Spark DataFrame Schema
403
- primary_list: 主键列表
404
- Returns:
405
- List[Entity]
406
- """
407
- entities = list()
408
- for field in schema.fields:
409
- if primary_list:
410
- if field.name not in primary_list:
411
- continue
412
-
413
- entity_name = _get_entity_name(table_name, field.name)
414
- if isinstance(field.dataType, (TimestampType, DateType)):
415
- continue
416
- # entities.append(Entity(name=entity_name, value_type=ValueType.UNIX_TIMESTAMP))
417
- elif isinstance(field.dataType, IntegerType):
418
- entities.append(Entity(name=entity_name, value_type=ValueType.INT32))
419
- elif isinstance(field.dataType, StringType):
420
- entities.append(Entity(name=entity_name, value_type=ValueType.STRING))
421
- elif isinstance(field.dataType, (DecimalType, FloatType)):
422
- entities.append(Entity(name=entity_name, value_type=ValueType.FLOAT))
423
- elif isinstance(field.dataType, DoubleType):
424
- entities.append(Entity(name=entity_name, value_type=ValueType.DOUBLE))
425
- elif isinstance(field.dataType, BooleanType):
426
- entities.append(Entity(name=entity_name, value_type=ValueType.BOOL))
427
- elif isinstance(field.dataType, ByteType):
428
- entities.append(Entity(name=entity_name, value_type=ValueType.BYTES))
429
- elif isinstance(field.dataType, LongType):
430
- entities.append(Entity(name=entity_name, value_type=ValueType.INT64))
431
- elif isinstance(field.dataType, NullType):
432
- entities.append(Entity(name=entity_name, value_type=ValueType.NULL))
433
- elif isinstance(field.dataType, ArrayType):
434
- if isinstance(field.dataType.elementType, ByteType):
435
- entities.append(Entity(name=entity_name, value_type=ValueType.BYTES_LIST))
436
- elif isinstance(field.dataType.elementType, StringType):
437
- entities.append(Entity(name=entity_name, value_type=ValueType.STRING_LIST))
438
- elif isinstance(field.dataType.elementType, IntegerType):
439
- entities.append(Entity(name=entity_name, value_type=ValueType.INT32_LIST))
440
- elif isinstance(field.dataType.elementType, LongType):
441
- entities.append(Entity(name=entity_name, value_type=ValueType.INT64_LIST))
442
- elif isinstance(field.dataType.elementType, DoubleType):
443
- entities.append(Entity(name=entity_name, value_type=ValueType.DOUBLE_LIST))
444
- elif isinstance(field.dataType.elementType, (DecimalType, FloatType)):
445
- entities.append(Entity(name=entity_name, value_type=ValueType.FLOAT_LIST))
446
- elif isinstance(field.dataType.elementType, BooleanType):
447
- entities.append(Entity(name=entity_name, value_type=ValueType.BOOL_LIST))
448
- elif isinstance(field.dataType.elementType, (TimestampType, DateType)):
449
- continue
450
- # entities.append(Entity(name=entity_name, value_type=ValueType.UNIX_TIMESTAMP_LIST))
451
- else:
452
- print(f"Unsupported array element type: {field.dataType.elementType}")
453
- else:
454
- print(f"Unsupported field type: {field.dataType}")
455
-
456
- return entities
457
-
458
-
459
- def translate_table_name_to_feast(table_name: str):
460
- splits = table_name.split(".")
461
- if len(splits) == 1:
462
- return table_name
463
- elif len(splits) == 2:
464
- return f"{splits[0]}_{splits[1]}"
465
- else:
466
- raise ValueError(f"Invalid table name: {table_name}")
467
-
468
-
469
- def get_materialization_default_time() -> List[Tuple[datetime, datetime]]:
470
- return [(datetime(1990, 1, 1), datetime(1990, 1, 1))]
471
-
472
-
473
- # if __name__ == '__main__':
474
- # import datetime
475
- # FeastClient = FeastClient()
476
- # FeastClient.client.registry.delete_data_source(name="xxxxx")
477
- # FeastClient.client.registry.delete_entity("xxxxx", )
478
- # FeastClient.client.registry.delete_feature_view()
479
- # FeastClient.client.registry.get_feature_view()
480
- # FeastClient.client.registry.delete_feature_service()
481
- # FeastClient.client.get_historical_features()
482
- # feature_view = FeastClient.client.get_feature_view(name="xxxxx")
483
- # feature_view.source.get_table_query_string()
484
- # feast_table_name = "xxx"
485
- # from wedata.feature_store.utils.common_utils import build_full_table_name
486
- # feast_table_name = translate_table_name_to_feast(build_full_table_name(table_name, database_name))
487
- # FeastClient.client.materialize(start_date=datetime.datetime(2021,1,1), end_date=datetime.datetime.now(), feature_views=[feast_table_name])