tencent-wedata-feature-engineering-dev 0.1.48__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tencent_wedata_feature_engineering_dev-0.1.48.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/METADATA +14 -3
- tencent_wedata_feature_engineering_dev-0.2.5.dist-info/RECORD +78 -0
- {tencent_wedata_feature_engineering_dev-0.1.48.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/WHEEL +1 -1
- wedata/__init__.py +1 -1
- wedata/common/base_table_client/__init__.py +1 -0
- wedata/common/base_table_client/base.py +58 -0
- wedata/common/cloud_sdk_client/__init__.py +2 -0
- wedata/{feature_store → common}/cloud_sdk_client/client.py +33 -3
- wedata/{feature_store → common}/cloud_sdk_client/models.py +212 -37
- wedata/{feature_store → common}/cloud_sdk_client/utils.py +7 -0
- wedata/{feature_store → common}/constants/constants.py +3 -2
- wedata/common/constants/engine_types.py +34 -0
- wedata/{feature_store → common}/entities/column_info.py +6 -5
- wedata/{feature_store → common}/entities/feature_column_info.py +2 -1
- wedata/{feature_store → common}/entities/feature_lookup.py +1 -1
- wedata/{feature_store → common}/entities/feature_spec.py +9 -9
- wedata/{feature_store → common}/entities/feature_table_info.py +1 -1
- wedata/{feature_store → common}/entities/function_info.py +2 -1
- wedata/{feature_store → common}/entities/on_demand_column_info.py +2 -1
- wedata/{feature_store → common}/entities/source_data_column_info.py +3 -1
- wedata/{feature_store → common}/entities/training_set.py +6 -6
- wedata/common/feast_client/__init__.py +1 -0
- wedata/{feature_store → common}/feast_client/feast_client.py +1 -1
- wedata/common/log/__init__.py +1 -0
- wedata/{feature_store/common → common}/log/logger.py +9 -5
- wedata/common/spark_client/__init__.py +1 -0
- wedata/{feature_store → common}/spark_client/spark_client.py +6 -7
- wedata/{feature_store → common}/utils/common_utils.py +7 -9
- wedata/{feature_store → common}/utils/env_utils.py +12 -0
- wedata/{feature_store → common}/utils/feature_lookup_utils.py +6 -6
- wedata/{feature_store → common}/utils/feature_spec_utils.py +13 -8
- wedata/{feature_store → common}/utils/feature_utils.py +5 -5
- wedata/{feature_store → common}/utils/on_demand_utils.py +5 -4
- wedata/{feature_store → common}/utils/schema_utils.py +1 -1
- wedata/{feature_store → common}/utils/signature_utils.py +4 -4
- wedata/{feature_store → common}/utils/training_set_utils.py +13 -13
- wedata/{feature_store → common}/utils/uc_utils.py +1 -1
- wedata/feature_engineering/__init__.py +1 -0
- wedata/feature_engineering/client.py +417 -0
- wedata/feature_engineering/ml_training_client/ml_training_client.py +569 -0
- wedata/feature_engineering/mlflow_model.py +9 -0
- wedata/feature_engineering/table_client/table_client.py +548 -0
- wedata/feature_store/client.py +11 -15
- wedata/feature_store/constants/engine_types.py +8 -30
- wedata/feature_store/feature_table_client/feature_table_client.py +73 -105
- wedata/feature_store/training_set_client/training_set_client.py +12 -23
- wedata/tempo/interpol.py +2 -2
- tencent_wedata_feature_engineering_dev-0.1.48.dist-info/RECORD +0 -66
- {tencent_wedata_feature_engineering_dev-0.1.48.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/top_level.txt +0 -0
- /wedata/{feature_store/cloud_sdk_client → common}/__init__.py +0 -0
- /wedata/{feature_store/common/log → common/constants}/__init__.py +0 -0
- /wedata/{feature_store/common/protos → common/entities}/__init__.py +0 -0
- /wedata/{feature_store → common}/entities/environment_variables.py +0 -0
- /wedata/{feature_store → common}/entities/feature.py +0 -0
- /wedata/{feature_store → common}/entities/feature_function.py +0 -0
- /wedata/{feature_store → common}/entities/feature_spec_constants.py +0 -0
- /wedata/{feature_store → common}/entities/feature_table.py +0 -0
- /wedata/{feature_store/entities → common/protos}/__init__.py +0 -0
- /wedata/{feature_store/common → common}/protos/feature_store_pb2.py +0 -0
- /wedata/{feature_store/feast_client → common/utils}/__init__.py +0 -0
- /wedata/{feature_store → common}/utils/topological_sort.py +0 -0
- /wedata/{feature_store → common}/utils/validation_utils.py +0 -0
- /wedata/{feature_store/spark_client → feature_engineering/ml_training_client}/__init__.py +0 -0
- /wedata/{feature_store/utils → feature_engineering/table_client}/__init__.py +0 -0
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
特征表操作相关工具方法
|
|
3
3
|
"""
|
|
4
4
|
import json
|
|
5
|
-
from typing import Union, List, Dict, Optional,
|
|
5
|
+
from typing import Union, List, Dict, Optional, Any
|
|
6
6
|
|
|
7
7
|
import tencentcloud.common.exception
|
|
8
8
|
from pyspark.sql import DataFrame, SparkSession
|
|
@@ -10,23 +10,23 @@ from pyspark.sql.streaming import StreamingQuery
|
|
|
10
10
|
from pyspark.sql.types import StructType
|
|
11
11
|
import os
|
|
12
12
|
import datetime
|
|
13
|
-
from wedata.
|
|
13
|
+
from wedata.common.constants.constants import (
|
|
14
14
|
APPEND, DEFAULT_WRITE_STREAM_TRIGGER, FEATURE_TABLE_KEY,
|
|
15
15
|
FEATURE_TABLE_VALUE, FEATURE_TABLE_PROJECT, FEATURE_TABLE_TIMESTAMP,
|
|
16
16
|
FEATURE_TABLE_BACKUP_PRIMARY_KEY, FEATURE_DLC_TABLE_PRIMARY_KEY)
|
|
17
|
-
from wedata.
|
|
17
|
+
from wedata.common.constants.engine_types import EngineTypes
|
|
18
|
+
from wedata.common.log import get_logger
|
|
18
19
|
from wedata.feature_store.common.store_config.redis import RedisStoreConfig
|
|
19
|
-
from wedata.
|
|
20
|
-
from wedata.
|
|
21
|
-
from wedata.
|
|
22
|
-
from wedata.
|
|
23
|
-
from wedata.
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
from wedata.feature_store.cloud_sdk_client.client import FeatureCloudSDK
|
|
20
|
+
from wedata.common.entities.feature_table import FeatureTable
|
|
21
|
+
from wedata.common.spark_client import SparkClient
|
|
22
|
+
from wedata.common.utils import common_utils, env_utils
|
|
23
|
+
from wedata.common.feast_client.feast_client import FeastClient
|
|
24
|
+
from wedata.common.cloud_sdk_client import models
|
|
25
|
+
from wedata.common.cloud_sdk_client import FeatureCloudSDK
|
|
26
|
+
from wedata.common.base_table_client import AbstractBaseTableClient
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
class FeatureTableClient:
|
|
29
|
+
class FeatureTableClient(AbstractBaseTableClient):
|
|
30
30
|
"""特征表操作类"""
|
|
31
31
|
|
|
32
32
|
def __init__(
|
|
@@ -44,6 +44,11 @@ class FeatureTableClient:
|
|
|
44
44
|
self.__cloud_secret_id, self.__cloud_secret_key = env_utils.get_cloud_secret()
|
|
45
45
|
self.__project = env_utils.get_project_id()
|
|
46
46
|
self.__region = env_utils.get_region()
|
|
47
|
+
self.__logger = get_logger()
|
|
48
|
+
default_online_table = self._get_offline_default_database()
|
|
49
|
+
if default_online_table:
|
|
50
|
+
env_utils.set_default_database(default_online_table.DatabaseName)
|
|
51
|
+
|
|
47
52
|
|
|
48
53
|
@property
|
|
49
54
|
def cloud_secret_id(self) -> str:
|
|
@@ -77,56 +82,6 @@ class FeatureTableClient:
|
|
|
77
82
|
def region(self) -> str:
|
|
78
83
|
return self.__region
|
|
79
84
|
|
|
80
|
-
@staticmethod
|
|
81
|
-
def _normalize_params(
|
|
82
|
-
param: Optional[Union[str, Sequence[str]]],
|
|
83
|
-
default_type: type = list
|
|
84
|
-
) -> list:
|
|
85
|
-
"""统一处理参数标准化"""
|
|
86
|
-
if param is None:
|
|
87
|
-
return default_type()
|
|
88
|
-
return list(param) if isinstance(param, Sequence) else [param]
|
|
89
|
-
|
|
90
|
-
@staticmethod
|
|
91
|
-
def _validate_schema(df: DataFrame, schema: StructType):
|
|
92
|
-
"""校验DataFrame和schema的有效性和一致性"""
|
|
93
|
-
# 检查是否同时为空
|
|
94
|
-
if df is None and schema is None:
|
|
95
|
-
raise ValueError("Either DataFrame or schema must be provided")
|
|
96
|
-
|
|
97
|
-
# 检查schema匹配
|
|
98
|
-
if df is not None and schema is not None:
|
|
99
|
-
df_schema = df.schema
|
|
100
|
-
if df_schema != schema:
|
|
101
|
-
diff_fields = set(df_schema.fieldNames()).symmetric_difference(set(schema.fieldNames()))
|
|
102
|
-
raise ValueError(
|
|
103
|
-
f"DataFrame schema does not match. Differences: "
|
|
104
|
-
f"{diff_fields if diff_fields else 'field type mismatch'}"
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
@staticmethod
|
|
108
|
-
def _validate_key_conflicts(primary_keys: List[str], timestamp_keys: str):
|
|
109
|
-
"""校验主键与时间戳键是否冲突"""
|
|
110
|
-
if timestamp_keys in primary_keys:
|
|
111
|
-
raise ValueError(f"Timestamp keys conflict with primary keys: {timestamp_keys}")
|
|
112
|
-
|
|
113
|
-
@staticmethod
|
|
114
|
-
def _validate_key_exists(primary_keys: List[str], timestamp_keys: str):
|
|
115
|
-
"""校验主键与时间戳键是否存在"""
|
|
116
|
-
if not primary_keys:
|
|
117
|
-
raise ValueError("Primary keys cannot be empty")
|
|
118
|
-
if not timestamp_keys:
|
|
119
|
-
raise ValueError("Timestamp keys cannot be empty")
|
|
120
|
-
|
|
121
|
-
@staticmethod
|
|
122
|
-
def _escape_sql_value(value: str) -> str:
|
|
123
|
-
"""转义SQL值中的特殊字符"""
|
|
124
|
-
return value.replace("'", "''")
|
|
125
|
-
|
|
126
|
-
@staticmethod
|
|
127
|
-
def _check_sequence_element_type(sequence: Sequence[Any], element_type: type) -> bool:
|
|
128
|
-
"""检查序列中的元素是否为指定类型"""
|
|
129
|
-
return all(isinstance(element, element_type) for element in sequence)
|
|
130
85
|
|
|
131
86
|
def create_table(
|
|
132
87
|
self,
|
|
@@ -310,7 +265,7 @@ class FeatureTableClient:
|
|
|
310
265
|
raise ValueError(f"Engine type {engine_type} is not supported")
|
|
311
266
|
|
|
312
267
|
# 打印sql
|
|
313
|
-
|
|
268
|
+
self.__logger.info(f"create table ddl: {ddl}\n")
|
|
314
269
|
|
|
315
270
|
# 执行DDL
|
|
316
271
|
try:
|
|
@@ -320,8 +275,6 @@ class FeatureTableClient:
|
|
|
320
275
|
except Exception as e:
|
|
321
276
|
raise ValueError(f"Failed to create table: {str(e)}") from e
|
|
322
277
|
|
|
323
|
-
print("async table info to feast")
|
|
324
|
-
|
|
325
278
|
self._feast_client.create_table(
|
|
326
279
|
table_name=table_name,
|
|
327
280
|
primary_keys=primary_keys,
|
|
@@ -332,7 +285,7 @@ class FeatureTableClient:
|
|
|
332
285
|
description=description
|
|
333
286
|
)
|
|
334
287
|
|
|
335
|
-
|
|
288
|
+
self.__logger.info(f"Table '{name}' created successfully. Starting web synchronization.")
|
|
336
289
|
|
|
337
290
|
try:
|
|
338
291
|
self._sync_table_info(table_name=name, action_name="create",
|
|
@@ -340,7 +293,8 @@ class FeatureTableClient:
|
|
|
340
293
|
data_source_name=data_source_name, engine_name=env_utils.get_engine_name(),
|
|
341
294
|
is_try=False)
|
|
342
295
|
except tencentcloud.common.exception.TencentCloudSDKException as e:
|
|
343
|
-
raise RuntimeError(f"
|
|
296
|
+
raise RuntimeError(f"Failed to synchronize web data for table '{name}'. "
|
|
297
|
+
f"Please manually operate on the web page. Error: {str(e)}")
|
|
344
298
|
|
|
345
299
|
# 构建并返回FeatureTable对象
|
|
346
300
|
return FeatureTable(
|
|
@@ -527,27 +481,28 @@ class FeatureTableClient:
|
|
|
527
481
|
|
|
528
482
|
try:
|
|
529
483
|
self._sync_table_info(table_name=name, action_name="create",
|
|
530
|
-
|
|
531
|
-
|
|
484
|
+
database_name=env_utils.get_database_name(database_name),
|
|
485
|
+
data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=True)
|
|
532
486
|
except tencentcloud.common.exception.TencentCloudSDKException as e:
|
|
533
487
|
raise RuntimeError(f"Table '{name}' is can't create. {str(e)}")
|
|
534
488
|
|
|
535
489
|
# 执行修改
|
|
536
|
-
|
|
490
|
+
self.__logger.info(f"alter table sql: \n {alter_sql}")
|
|
537
491
|
self._spark.sql(alter_sql)
|
|
538
|
-
|
|
492
|
+
self.__logger.debug("Execute sql done, start sync table info to feast")
|
|
539
493
|
self._feast_client.alter_table(full_table_name=table_name, primary_keys=primary_keys,
|
|
540
494
|
timestamp_key=timestamp_key)
|
|
541
|
-
|
|
495
|
+
self.__logger.info(f"Successfully register table '{table_name}'. Starting web synchronization.")
|
|
542
496
|
|
|
543
497
|
try:
|
|
544
498
|
self._sync_table_info(table_name=name, action_name="create",
|
|
545
|
-
|
|
546
|
-
|
|
499
|
+
database_name=env_utils.get_database_name(database_name),
|
|
500
|
+
data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=False)
|
|
547
501
|
except tencentcloud.common.exception.TencentCloudSDKException as e:
|
|
548
|
-
raise RuntimeError(f"
|
|
549
|
-
|
|
550
|
-
|
|
502
|
+
raise RuntimeError(f"Failed to synchronize web data for table '{name}'. "
|
|
503
|
+
f"Please manually operate on the web page. Error: {str(e)}")
|
|
504
|
+
except (ValueError, RuntimeError):
|
|
505
|
+
raise
|
|
551
506
|
except Exception as e:
|
|
552
507
|
raise RuntimeError(f"Failed to modify properties for table '{table_name}': {str(e)}") from e
|
|
553
508
|
|
|
@@ -624,33 +579,34 @@ class FeatureTableClient:
|
|
|
624
579
|
try:
|
|
625
580
|
# 检查表是否存在
|
|
626
581
|
if not self._check_table_exists(table_name):
|
|
627
|
-
|
|
582
|
+
self.__logger.error(f"Table '{name}' does not exist")
|
|
628
583
|
return
|
|
629
584
|
|
|
630
585
|
try:
|
|
631
586
|
feature_view = self._feast_client.get_feature_view(table_name)
|
|
632
587
|
except Exception as e:
|
|
633
|
-
|
|
588
|
+
pass
|
|
589
|
+
# self.__logger.warning(f"Table '{name}' is not a feature table, skip delete. {str(e)}")
|
|
634
590
|
else:
|
|
635
591
|
if feature_view.online:
|
|
636
592
|
raise ValueError(f"Table '{name}' has a online table, please call drop_online_table first")
|
|
637
593
|
try:
|
|
638
594
|
self._sync_table_info(table_name=name, action_name="delete",
|
|
639
|
-
|
|
640
|
-
|
|
595
|
+
database_name=env_utils.get_database_name(database_name),
|
|
596
|
+
data_source_name="", engine_name=env_utils.get_engine_name(), is_try=True)
|
|
641
597
|
except tencentcloud.common.exception.TencentCloudSDKException as e:
|
|
642
598
|
raise RuntimeError(f"Table '{name}' is can't delete. {str(e)}")
|
|
643
599
|
|
|
644
600
|
# 执行删除
|
|
645
601
|
self._spark.sql(f"DROP TABLE {table_name}")
|
|
646
|
-
|
|
602
|
+
self.__logger.info(f"Table '{name}' dropped")
|
|
647
603
|
try:
|
|
648
604
|
self._feast_client.remove_offline_table(table_name=table_name)
|
|
649
605
|
except Exception as e:
|
|
650
606
|
raise
|
|
651
607
|
# raise ValueError(f"Failed to delete table '{name}' in feast: {str(e)}")
|
|
652
608
|
else:
|
|
653
|
-
|
|
609
|
+
self.__logger.info(f"Table '{name}' removed from feast")
|
|
654
610
|
|
|
655
611
|
try:
|
|
656
612
|
self._sync_table_info(table_name=name, action_name="delete",
|
|
@@ -706,7 +662,7 @@ class FeatureTableClient:
|
|
|
706
662
|
schema_name_list = [field.name for field in tmp_schema_list]
|
|
707
663
|
schema = StructType(tmp_schema_list)
|
|
708
664
|
for field in schema:
|
|
709
|
-
|
|
665
|
+
self.__logger.debug(f"translate {field.name} to feast Type: {field.dataType}")
|
|
710
666
|
|
|
711
667
|
feast_client = FeastClient(offline_store=self._spark, online_store_config=online_config)
|
|
712
668
|
# 构建离线表的entity的数据过滤
|
|
@@ -721,7 +677,7 @@ class FeatureTableClient:
|
|
|
721
677
|
full_table_name=full_table_name,
|
|
722
678
|
columns_name=columns_name_list,
|
|
723
679
|
entity_rows=[result_row.asDict()])
|
|
724
|
-
|
|
680
|
+
self.__logger.debug(f"=====>read online dataframe:\n{online_view[schema_name_list]}")
|
|
725
681
|
return self._spark.createDataFrame(online_view[schema_name_list], schema=schema, verifySchema=False)
|
|
726
682
|
else:
|
|
727
683
|
return self._spark.createDataFrame([])
|
|
@@ -730,7 +686,7 @@ class FeatureTableClient:
|
|
|
730
686
|
full_table_name=full_table_name,
|
|
731
687
|
columns_name=columns_name_list,
|
|
732
688
|
entity_rows=entity_row)
|
|
733
|
-
|
|
689
|
+
self.__logger.debug(f"=====>read online dataframe:\n{online_view[schema_name_list]}")
|
|
734
690
|
return self._spark.createDataFrame(online_view[schema_name_list], schema=schema, verifySchema=False)
|
|
735
691
|
|
|
736
692
|
def get_table(
|
|
@@ -819,7 +775,9 @@ class FeatureTableClient:
|
|
|
819
775
|
|
|
820
776
|
# 执行修改
|
|
821
777
|
self._spark.sql(alter_sql)
|
|
822
|
-
self.
|
|
778
|
+
tbl_pro = self._spark.sql(f"SHOW TBLPROPERTIES {table_name}")
|
|
779
|
+
props = {row['key']: row['value'] for row in tbl_pro.collect()}
|
|
780
|
+
self._feast_client.modify_tags(table_name=table_name, tags=props)
|
|
823
781
|
print(f"Successfully updated properties for table '{name}': {list(properties.keys())}")
|
|
824
782
|
|
|
825
783
|
except ValueError as e:
|
|
@@ -827,9 +785,9 @@ class FeatureTableClient:
|
|
|
827
785
|
except Exception as e:
|
|
828
786
|
raise RuntimeError(f"Failed to modify properties for table '{name}': {str(e)}") from e
|
|
829
787
|
|
|
830
|
-
def publish_table(self, table_name: str, data_source_name: str,
|
|
788
|
+
def publish_table(self, table_name: str, data_source_name: str,
|
|
831
789
|
database_name: Optional[str] = None,
|
|
832
|
-
is_cycle: bool = False, cycle_obj: TaskSchedulerConfiguration = None,
|
|
790
|
+
is_cycle: bool = False, cycle_obj: models.TaskSchedulerConfiguration = None,
|
|
833
791
|
is_use_default_online: bool = True, online_config: RedisStoreConfig = None):
|
|
834
792
|
"""
|
|
835
793
|
将离线特征表发布为在线特征表
|
|
@@ -852,46 +810,43 @@ class FeatureTableClient:
|
|
|
852
810
|
# 检查是否已经发布,查看Redis中是否有值
|
|
853
811
|
try:
|
|
854
812
|
# 获取离线表的列名
|
|
855
|
-
online_data = self.
|
|
856
|
-
table_name=table_name,
|
|
857
|
-
database_name=database_name,
|
|
858
|
-
online_config=online_config)
|
|
813
|
+
online_data = self._feast_client.get_feature_view(full_table_name)
|
|
859
814
|
except Exception as e:
|
|
860
815
|
print(f"Failed to get online table view for table '{full_table_name}': {str(e)}")
|
|
861
816
|
else:
|
|
862
|
-
if online_data:
|
|
817
|
+
if online_data.online:
|
|
863
818
|
raise ValueError(f"Table '{full_table_name}' has already been published")
|
|
864
819
|
|
|
865
820
|
# 配置周期性参数
|
|
866
821
|
if is_cycle:
|
|
867
|
-
if not isinstance(cycle_obj, TaskSchedulerConfiguration):
|
|
822
|
+
if not isinstance(cycle_obj, models.TaskSchedulerConfiguration):
|
|
868
823
|
raise ValueError("cycle_obj must be a TaskSchedulerConfiguration object when is_cycle is True")
|
|
869
824
|
|
|
870
825
|
cycle_obj.CycleType = "CRONTAB_CYCLE"
|
|
871
826
|
else:
|
|
872
|
-
if isinstance(cycle_obj, TaskSchedulerConfiguration):
|
|
827
|
+
if isinstance(cycle_obj, models.TaskSchedulerConfiguration):
|
|
873
828
|
cycle_obj.CycleType = "ONEOFF_CYCLE"
|
|
874
829
|
else:
|
|
875
|
-
cycle_obj = TaskSchedulerConfiguration()
|
|
830
|
+
cycle_obj = models.TaskSchedulerConfiguration()
|
|
876
831
|
cycle_obj.CycleType = "ONEOFF_CYCLE"
|
|
877
832
|
# 设置默认当前时间延后1分钟
|
|
878
833
|
cycle_obj.CrontabExpression = (datetime.datetime.now() + datetime.timedelta(minutes=3)).strftime(
|
|
879
834
|
"%M %H %d %m %w ? %y")
|
|
880
835
|
|
|
881
836
|
if is_use_default_online:
|
|
882
|
-
online_feature_config = OnlineFeatureConfiguration()
|
|
837
|
+
online_feature_config = models.OnlineFeatureConfiguration()
|
|
883
838
|
online_feature_config.UserDefault = True
|
|
884
839
|
else:
|
|
885
840
|
if not isinstance(online_config, RedisStoreConfig):
|
|
886
841
|
raise ValueError("online_config must be a RedisStoreConfig object when is_use_default_online is False")
|
|
887
842
|
|
|
888
|
-
online_feature_config = OnlineFeatureConfiguration()
|
|
843
|
+
online_feature_config = models.OnlineFeatureConfiguration()
|
|
889
844
|
online_feature_config.UserDefault = False
|
|
890
845
|
online_feature_config.Host = online_config.host
|
|
891
846
|
online_feature_config.Port = online_config.port
|
|
892
847
|
online_feature_config.DB = online_config.db
|
|
893
848
|
|
|
894
|
-
offline_feature_config = OfflineFeatureConfiguration()
|
|
849
|
+
offline_feature_config = models.OfflineFeatureConfiguration()
|
|
895
850
|
offline_feature_config.DatabaseName = env_utils.get_database_name(database_name)
|
|
896
851
|
offline_feature_config.TableName = table_name
|
|
897
852
|
|
|
@@ -902,7 +857,7 @@ class FeatureTableClient:
|
|
|
902
857
|
offline_feature_config.DatasourceType = env_utils.get_engine_type()
|
|
903
858
|
offline_feature_config.EngineName = env_utils.get_engine_name()
|
|
904
859
|
|
|
905
|
-
api_requests = CreateOnlineFeatureTableRequest()
|
|
860
|
+
api_requests = models.CreateOnlineFeatureTableRequest()
|
|
906
861
|
api_requests.OfflineFeatureConfiguration = offline_feature_config
|
|
907
862
|
api_requests.OnlineFeatureConfiguration = online_feature_config
|
|
908
863
|
api_requests.TaskSchedulerConfiguration = cycle_obj
|
|
@@ -910,11 +865,11 @@ class FeatureTableClient:
|
|
|
910
865
|
region = env_utils.get_region()
|
|
911
866
|
if not os.environ.get("RESOURCE_GROUP_ID", ""):
|
|
912
867
|
res_group_item = _get_default_resource_group(
|
|
913
|
-
api_requests.ProjectId,
|
|
868
|
+
api_requests.ProjectId, self.__cloud_secret_id, self.__cloud_secret_key, region)
|
|
914
869
|
api_requests.ResourceGroupId = res_group_item.ExecutorGroupId
|
|
915
870
|
else:
|
|
916
871
|
api_requests.ResourceGroupId = os.environ.get("RESOURCE_GROUP_ID")
|
|
917
|
-
client = FeatureCloudSDK(secret_id=
|
|
872
|
+
client = FeatureCloudSDK(secret_id=self.__cloud_secret_id, secret_key=self.__cloud_secret_key, region=region)
|
|
918
873
|
resp = client.CreateOnlineFeatureTable(api_requests)
|
|
919
874
|
if cycle_obj.CycleType == "ONEOFF_CYCLE":
|
|
920
875
|
print(f"publish online task create success. it will be execute after 3 min. {resp.Data.OnlineTableId} {resp.Data.OfflineTableId} ")
|
|
@@ -955,10 +910,23 @@ class FeatureTableClient:
|
|
|
955
910
|
def _check_table_exists(self, full_table_name: str) -> bool:
|
|
956
911
|
return common_utils.check_spark_table_exists(self._spark, full_table_name)
|
|
957
912
|
|
|
913
|
+
def _get_offline_default_database(self) -> Optional[models.FeatureStoreDatabase]:
|
|
914
|
+
client = FeatureCloudSDK(secret_id=self.__cloud_secret_id, secret_key=self.__cloud_secret_key,
|
|
915
|
+
region=self.__region)
|
|
916
|
+
req = models.DescribeFeatureStoreDatabasesRequest()
|
|
917
|
+
req.ProjectId = self.__project
|
|
918
|
+
rsp = client.DescribeFeatureStoreDatabases(req)
|
|
919
|
+
if len(rsp.Data) == 0:
|
|
920
|
+
return None
|
|
921
|
+
for item in rsp.Data:
|
|
922
|
+
if item.OnlineMode == 0 and item.IsDefault == 1:
|
|
923
|
+
return item
|
|
924
|
+
return None
|
|
925
|
+
|
|
958
926
|
|
|
959
927
|
def _get_default_resource_group(project_id: str, secret_id: str, secret_key: str, region: str):
|
|
960
928
|
client = FeatureCloudSDK(secret_id=secret_id, secret_key=secret_key, region=region)
|
|
961
|
-
request = DescribeNormalSchedulerExecutorGroupsRequest()
|
|
929
|
+
request = models.DescribeNormalSchedulerExecutorGroupsRequest()
|
|
962
930
|
request.ProjectId = project_id
|
|
963
931
|
resp = client.DescribeNormalSchedulerExecutorGroups(request)
|
|
964
932
|
# 默认取第一个健康可用的资源组进行执行
|
|
@@ -972,7 +940,7 @@ def _refresh_table(project_id: str, secret_id: str, secret_key: str, region: str
|
|
|
972
940
|
action: str, database_name: str, data_source_name: str, data_source_type: str,
|
|
973
941
|
engine_name: str, is_try: bool):
|
|
974
942
|
client = FeatureCloudSDK(secret_id=secret_id, secret_key=secret_key, region=region)
|
|
975
|
-
request = RefreshFeatureTableRequest()
|
|
943
|
+
request = models.RefreshFeatureTableRequest()
|
|
976
944
|
request.ProjectId = project_id
|
|
977
945
|
request.TableName = table_name
|
|
978
946
|
request.DatabaseName = database_name
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
from types import ModuleType
|
|
4
|
-
from typing import Any, List, Optional,
|
|
4
|
+
from typing import Any, List, Optional, Union, Dict
|
|
5
5
|
|
|
6
6
|
import mlflow
|
|
7
7
|
from mlflow.models import Model
|
|
@@ -9,18 +9,17 @@ from mlflow.utils.file_utils import TempDir, read_yaml
|
|
|
9
9
|
from pyspark.sql import DataFrame
|
|
10
10
|
from pyspark.sql.functions import struct
|
|
11
11
|
|
|
12
|
-
from wedata.
|
|
13
|
-
from wedata.
|
|
14
|
-
from wedata.
|
|
15
|
-
from wedata.
|
|
16
|
-
from wedata.
|
|
17
|
-
from wedata.feature_store.entities.training_set import TrainingSet
|
|
12
|
+
from wedata.common.constants import constants
|
|
13
|
+
from wedata.common.entities.feature_function import FeatureFunction
|
|
14
|
+
from wedata.common.entities.feature_lookup import FeatureLookup
|
|
15
|
+
from wedata.common.entities.feature_spec import FeatureSpec
|
|
16
|
+
from wedata.common.entities.training_set import TrainingSet
|
|
18
17
|
from wedata.feature_store.mlflow_model import _FeatureStoreModelWrapper
|
|
19
|
-
from wedata.
|
|
20
|
-
from wedata.
|
|
21
|
-
from wedata.
|
|
18
|
+
from wedata.common.spark_client import SparkClient
|
|
19
|
+
from wedata.common.utils import validation_utils, common_utils, training_set_utils
|
|
20
|
+
from wedata.common.entities.feature_table import FeatureTable
|
|
22
21
|
|
|
23
|
-
from wedata.
|
|
22
|
+
from wedata.common.constants.constants import (
|
|
24
23
|
_NO_RESULT_TYPE_PASSED,
|
|
25
24
|
_USE_SPARK_NATIVE_JOIN,
|
|
26
25
|
MODEL_DATA_PATH_ROOT,
|
|
@@ -28,8 +27,8 @@ from wedata.feature_store.constants.constants import (
|
|
|
28
27
|
_PREBUILT_ENV_URI
|
|
29
28
|
)
|
|
30
29
|
|
|
31
|
-
from wedata.
|
|
32
|
-
from wedata.
|
|
30
|
+
from wedata.common.utils import uc_utils
|
|
31
|
+
from wedata.common.utils.signature_utils import get_mlflow_signature_from_feature_spec, \
|
|
33
32
|
drop_signature_inputs_and_invalid_params
|
|
34
33
|
|
|
35
34
|
_logger = logging.getLogger(__name__)
|
|
@@ -389,16 +388,13 @@ class TrainingSetClient:
|
|
|
389
388
|
"The provided DataFrame for scoring must have unique column names. Found duplicates {}.",
|
|
390
389
|
)
|
|
391
390
|
artifact_path = os.path.join("artifacts", MODEL_DATA_PATH_ROOT)
|
|
392
|
-
# print(f"artifact_path: {artifact_path}")
|
|
393
391
|
with (TempDir() as tmp_location):
|
|
394
392
|
local_path = (
|
|
395
393
|
local_uri
|
|
396
394
|
if local_uri
|
|
397
395
|
else common_utils.download_model_artifacts(model_uri, tmp_location.path())
|
|
398
396
|
)
|
|
399
|
-
# print(f"wedata local_path:{local_path}")
|
|
400
397
|
model_data_path = os.path.join(local_path, artifact_path)
|
|
401
|
-
# print(f"artifact_path: {artifact_path}")
|
|
402
398
|
|
|
403
399
|
# Augment local workspace metastore tables from 2L to 3L,
|
|
404
400
|
# this will prevent us from erroneously reading data from other catalogs
|
|
@@ -424,9 +420,7 @@ class TrainingSetClient:
|
|
|
424
420
|
|
|
425
421
|
# Validate that columns needed for joining feature tables exist and are not duplicates.
|
|
426
422
|
feature_input_keys = []
|
|
427
|
-
print("====>timestamp_key:", timestamp_key)
|
|
428
423
|
for fci in feature_spec.feature_column_infos:
|
|
429
|
-
print("====>fci:", fci.lookup_key)
|
|
430
424
|
feature_input_keys.extend([k for k in fci.lookup_key])
|
|
431
425
|
|
|
432
426
|
on_demand_input_names = uc_utils.get_unique_list_order(
|
|
@@ -440,18 +434,13 @@ class TrainingSetClient:
|
|
|
440
434
|
source_data_names = [
|
|
441
435
|
sdci.name for sdci in feature_spec.source_data_column_infos
|
|
442
436
|
]
|
|
443
|
-
# print(f"wedata source_data_names:{source_data_names}")
|
|
444
|
-
|
|
445
|
-
print("===>source_data_names:", source_data_names)
|
|
446
437
|
|
|
447
438
|
feature_output_names = [
|
|
448
439
|
fci.output_name for fci in feature_spec.feature_column_infos
|
|
449
440
|
]
|
|
450
|
-
print("====>feature_output_names:", feature_output_names)
|
|
451
441
|
on_demand_output_names = [
|
|
452
442
|
odci.output_name for odci in feature_spec.on_demand_column_infos
|
|
453
443
|
]
|
|
454
|
-
print("====>on_demand_output_names:", on_demand_output_names)
|
|
455
444
|
all_output_names = set(
|
|
456
445
|
source_data_names + feature_output_names + on_demand_output_names
|
|
457
446
|
)
|
wedata/tempo/interpol.py
CHANGED
|
@@ -23,7 +23,7 @@ class Interpolation:
|
|
|
23
23
|
"""
|
|
24
24
|
Validate if the fill provided is within the allowed list of values.
|
|
25
25
|
|
|
26
|
-
:param
|
|
26
|
+
:param method: Fill type e.g. "zero", "null", "bfill", "ffill", "linear"
|
|
27
27
|
"""
|
|
28
28
|
if method not in method_options:
|
|
29
29
|
raise ValueError(
|
|
@@ -43,8 +43,8 @@ class Interpolation:
|
|
|
43
43
|
|
|
44
44
|
:param df: DataFrame to be validated
|
|
45
45
|
:param partition_cols: Partition columns to be validated
|
|
46
|
-
:param target_col: Target column to be validated
|
|
47
46
|
:param ts_col: Timestamp column to be validated
|
|
47
|
+
:param ts_col_dtype: Timestamp column type
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
50
|
if partition_cols is not None:
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
wedata/__init__.py,sha256=GYxqkkgH0oH4QtNiOCZHuGkc0sSH1LgEqmhSX6sB4So,200
|
|
2
|
-
wedata/feature_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
wedata/feature_store/client.py,sha256=B6fy-PGJZsEHGPE8tDmrTolyioJ_-Po2DK3p_HCW7Sw,19552
|
|
4
|
-
wedata/feature_store/mlflow_model.py,sha256=OCUuccOoO0NXWSzIPoGeL03Ha1Q3aQTJW2RlJrTCmzc,554
|
|
5
|
-
wedata/feature_store/cloud_sdk_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
wedata/feature_store/cloud_sdk_client/client.py,sha256=B7nCQ_MvbLP4ieT7rsa32FHws2fOG4VQZT5lmQ3Cvzk,4914
|
|
7
|
-
wedata/feature_store/cloud_sdk_client/models.py,sha256=7_QUq0kZcrcclRMsIYFoqBrlzVwaHoVY-yU5SHIrJWM,19789
|
|
8
|
-
wedata/feature_store/cloud_sdk_client/utils.py,sha256=xwvXJpk2RXbJtgOaXCZQbGRrlzcTRzv27yQFxKp_X84,970
|
|
9
|
-
wedata/feature_store/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
wedata/feature_store/common/log/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
wedata/feature_store/common/log/logger.py,sha256=c45DlIbIuwRP2na3ZXsncpHV5KUltqfyKzIgG9GG3g4,1151
|
|
12
|
-
wedata/feature_store/common/protos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
wedata/feature_store/common/protos/feature_store_pb2.py,sha256=oMIUGGeGNP84g_nFqOQwTXjV1GiU2ehSOy7CyFu2__g,4207
|
|
14
|
-
wedata/feature_store/common/store_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
wedata/feature_store/common/store_config/redis.py,sha256=9R5npM2s1u0o9IakmpbRsFdJC0vNar_uvA62OLWuXBs,1145
|
|
16
|
-
wedata/feature_store/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
wedata/feature_store/constants/constants.py,sha256=M0UKZSnpM7x5RYDvfesyF422zg82JJe4JsHUuUDiUF4,1959
|
|
18
|
-
wedata/feature_store/constants/engine_types.py,sha256=42mI-kNDDtoA4_I3iqDe4FkF2M2l_Bt4Q1V6WUB-_k0,921
|
|
19
|
-
wedata/feature_store/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
-
wedata/feature_store/entities/column_info.py,sha256=-AR6EKHwgoqIkRHFyguxVEtnYt6fvusWHkEjF4kvS0A,5141
|
|
21
|
-
wedata/feature_store/entities/environment_variables.py,sha256=ZEFml5H9MQuzBKM074mUrFYu-Sga4Knmxqiwpke2WGc,1679
|
|
22
|
-
wedata/feature_store/entities/feature.py,sha256=wX8fTBlJq3GYdj9rrBDCY3kFgcVBBAiOOZdxEhnQkNQ,1241
|
|
23
|
-
wedata/feature_store/entities/feature_column_info.py,sha256=ZAS_I-MDg2ofCv3nwYvGCQrrpEljzrh_L1D-gqOV_mM,2407
|
|
24
|
-
wedata/feature_store/entities/feature_function.py,sha256=R17INrCE-U_Uj9KLbFz69aYlOkTETTwQHMMo470F4lQ,1865
|
|
25
|
-
wedata/feature_store/entities/feature_lookup.py,sha256=UYmYCzkQ1_KuooybS3F-7HDcjBMPZ72InL06UTHbEtw,8749
|
|
26
|
-
wedata/feature_store/entities/feature_spec.py,sha256=Z2SXE_LObjNY3q5yBVKPXGTUiMZy7zaI6-ZbAoFlwG8,21769
|
|
27
|
-
wedata/feature_store/entities/feature_spec_constants.py,sha256=YWDBfRiNDe6fUJFUBo3V4WYg2xsljoPAE-ZejfFZCgM,785
|
|
28
|
-
wedata/feature_store/entities/feature_table.py,sha256=nHCCd7WUryROt9oTJpYkT-KiGbKcQd7BEE9L2_1dhYw,4107
|
|
29
|
-
wedata/feature_store/entities/feature_table_info.py,sha256=yJ1P3AYaPiHW6ehCbMWhndzguHJqJKWfeFwYjwTLt2U,1481
|
|
30
|
-
wedata/feature_store/entities/function_info.py,sha256=yDwIzTrBR-ECWubgeoy48SYZfdY7P0JcraZnWGCW0ag,2752
|
|
31
|
-
wedata/feature_store/entities/on_demand_column_info.py,sha256=a44ep-f3FOruWNXl3c8v7733rNuoKXJaHTv1aqF905s,1739
|
|
32
|
-
wedata/feature_store/entities/source_data_column_info.py,sha256=FyBmBPUSvc2S2OPFTvsQf2AdS-KFGkYBmd4yL_Vur8M,702
|
|
33
|
-
wedata/feature_store/entities/training_set.py,sha256=ylt1h6Z_xU8hKYvnvd80CeewTGSN68-_kvFpoliwH7s,5679
|
|
34
|
-
wedata/feature_store/feast_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
wedata/feature_store/feast_client/feast_client.py,sha256=mCv-OiKehfgcOJhJV0wXMRs5d7e2zEBYmVmDguk0rxU,20728
|
|
36
|
-
wedata/feature_store/feature_table_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
-
wedata/feature_store/feature_table_client/feature_table_client.py,sha256=qyr-jS-nIyphFVZXcUXV_HSfAu-8c19f0b8iG5rYsl8,42669
|
|
38
|
-
wedata/feature_store/spark_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
wedata/feature_store/spark_client/spark_client.py,sha256=aTenEqfZoJYMrph98qjNHZ-H4dgNKnMaH14st8bCVRQ,11797
|
|
40
|
-
wedata/feature_store/training_set_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
-
wedata/feature_store/training_set_client/training_set_client.py,sha256=Ja_W1SKWKueW6wmwDx-623mfpwKQICm6A-ec_jgOFt4,23707
|
|
42
|
-
wedata/feature_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
-
wedata/feature_store/utils/common_utils.py,sha256=vkpoXxZTd6S-2MgdyTQZ6P_ckdqNSK50ECuVBG2BwfI,12314
|
|
44
|
-
wedata/feature_store/utils/env_utils.py,sha256=kMCl6gyqDxjN5IZ7-wZMR0R4YY-Evh_7NHxugSYPWQc,2736
|
|
45
|
-
wedata/feature_store/utils/feature_lookup_utils.py,sha256=mNV6RhBdpv1iTZduCA9YwXwkeJCwU5MFQ1MkFeD9IhY,22003
|
|
46
|
-
wedata/feature_store/utils/feature_spec_utils.py,sha256=j8t-zel2_r8Q9m88BmFKkHMdkGNIduWJB-28OZDASRY,11613
|
|
47
|
-
wedata/feature_store/utils/feature_utils.py,sha256=KKq28bVB_lCuhnR9Hk6JegJBOVgcelWlvrRM-F9onkA,2796
|
|
48
|
-
wedata/feature_store/utils/on_demand_utils.py,sha256=pazZRG5c0Se08MV_inBddIeX4Q9xlVN_H9SC_WK3xzs,4260
|
|
49
|
-
wedata/feature_store/utils/schema_utils.py,sha256=y6EYY1pUxjVg6MP4C7avdW8ZEBBaDo1YTV2CmPF4i8o,4491
|
|
50
|
-
wedata/feature_store/utils/signature_utils.py,sha256=SZFufd19m0jmGnOLmAl3JPKZC-qHq-wQezh6G7HOMfc,7773
|
|
51
|
-
wedata/feature_store/utils/topological_sort.py,sha256=ebzKxmxeCLk9seB1zR0ASCGXsZsa-DjxJeTc4KUadtg,6475
|
|
52
|
-
wedata/feature_store/utils/training_set_utils.py,sha256=MYsPZS1d9HKswHgjgxD8K7H9N3dWPyyTTx20Mkp4PVU,22497
|
|
53
|
-
wedata/feature_store/utils/uc_utils.py,sha256=5jngdLT8quP1lfGHN_SSFQQlcOh_sUB9M1varCgdFwg,11436
|
|
54
|
-
wedata/feature_store/utils/validation_utils.py,sha256=lJe6HCg5v5CZxH_pvT-vpGhCpo66LT2erXraHE2T0iI,2584
|
|
55
|
-
wedata/tempo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
-
wedata/tempo/interpol.py,sha256=3JF8dwcdKv2o10FN45aefgvxR5DjlR6FJAXrbAiGCro,16423
|
|
57
|
-
wedata/tempo/intervals.py,sha256=L2ao7LlgQmfDTFwnBoFLXeuygSvwtIKXL52thiD80Yw,44078
|
|
58
|
-
wedata/tempo/io.py,sha256=KWIn6IBSkuBxr8QCcpdZ6NFX_49-8UQdGftmZgs_ujw,1872
|
|
59
|
-
wedata/tempo/ml.py,sha256=WtGa2szn6PditvZsTZoxo7wFDe4k1SRoMZ-jgNGIjvE,4323
|
|
60
|
-
wedata/tempo/resample.py,sha256=h81RVVmCl4ect-YKE-KZZHPDi1rGI3sh-YIb-Btz0ck,9698
|
|
61
|
-
wedata/tempo/tsdf.py,sha256=S4lZfxhSRFiezYoYS6gvGsl1mZA3zp-MWEKFHYZpDg0,70968
|
|
62
|
-
wedata/tempo/utils.py,sha256=I9I6l2DMwUoY213L04Yc1UR_zTWgSkj1BVo4ZwzQd4Y,7977
|
|
63
|
-
tencent_wedata_feature_engineering_dev-0.1.48.dist-info/METADATA,sha256=-A-1H2urk9u4M9sEoQYri1xCeVtH3lFhUrsajCGb9tU,582
|
|
64
|
-
tencent_wedata_feature_engineering_dev-0.1.48.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
65
|
-
tencent_wedata_feature_engineering_dev-0.1.48.dist-info/top_level.txt,sha256=Xa0v1rh__RvfVTVDirW5r5UBKg7ZO_iuTeXfp8MNo2A,7
|
|
66
|
-
tencent_wedata_feature_engineering_dev-0.1.48.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|