tencent-wedata-feature-engineering-dev 0.1.42__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {tencent_wedata_feature_engineering_dev-0.1.42.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/METADATA +14 -3
  2. tencent_wedata_feature_engineering_dev-0.2.5.dist-info/RECORD +78 -0
  3. {tencent_wedata_feature_engineering_dev-0.1.42.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/WHEEL +1 -1
  4. wedata/__init__.py +1 -1
  5. wedata/common/base_table_client/__init__.py +1 -0
  6. wedata/common/base_table_client/base.py +58 -0
  7. wedata/common/cloud_sdk_client/__init__.py +2 -0
  8. wedata/{feature_store → common}/cloud_sdk_client/client.py +56 -12
  9. wedata/{feature_store → common}/cloud_sdk_client/models.py +212 -37
  10. wedata/{feature_store → common}/cloud_sdk_client/utils.py +14 -0
  11. wedata/{feature_store → common}/constants/constants.py +3 -2
  12. wedata/common/constants/engine_types.py +34 -0
  13. wedata/{feature_store → common}/entities/column_info.py +6 -5
  14. wedata/{feature_store → common}/entities/feature_column_info.py +2 -1
  15. wedata/{feature_store → common}/entities/feature_lookup.py +1 -1
  16. wedata/{feature_store → common}/entities/feature_spec.py +9 -9
  17. wedata/{feature_store → common}/entities/feature_table_info.py +1 -1
  18. wedata/{feature_store → common}/entities/function_info.py +2 -1
  19. wedata/{feature_store → common}/entities/on_demand_column_info.py +2 -1
  20. wedata/{feature_store → common}/entities/source_data_column_info.py +3 -1
  21. wedata/{feature_store → common}/entities/training_set.py +6 -6
  22. wedata/common/feast_client/__init__.py +1 -0
  23. wedata/{feature_store → common}/feast_client/feast_client.py +3 -4
  24. wedata/common/log/__init__.py +1 -0
  25. wedata/common/log/logger.py +44 -0
  26. wedata/common/spark_client/__init__.py +1 -0
  27. wedata/{feature_store → common}/spark_client/spark_client.py +6 -9
  28. wedata/{feature_store → common}/utils/common_utils.py +7 -9
  29. wedata/{feature_store → common}/utils/env_utils.py +31 -10
  30. wedata/{feature_store → common}/utils/feature_lookup_utils.py +6 -6
  31. wedata/{feature_store → common}/utils/feature_spec_utils.py +13 -8
  32. wedata/{feature_store → common}/utils/feature_utils.py +5 -5
  33. wedata/{feature_store → common}/utils/on_demand_utils.py +5 -4
  34. wedata/{feature_store → common}/utils/schema_utils.py +1 -1
  35. wedata/{feature_store → common}/utils/signature_utils.py +4 -4
  36. wedata/{feature_store → common}/utils/training_set_utils.py +13 -13
  37. wedata/{feature_store → common}/utils/uc_utils.py +1 -1
  38. wedata/feature_engineering/__init__.py +1 -0
  39. wedata/feature_engineering/client.py +417 -0
  40. wedata/feature_engineering/ml_training_client/ml_training_client.py +569 -0
  41. wedata/feature_engineering/mlflow_model.py +9 -0
  42. wedata/feature_engineering/table_client/__init__.py +0 -0
  43. wedata/feature_engineering/table_client/table_client.py +548 -0
  44. wedata/feature_store/client.py +13 -16
  45. wedata/feature_store/constants/engine_types.py +8 -30
  46. wedata/feature_store/feature_table_client/feature_table_client.py +98 -108
  47. wedata/feature_store/training_set_client/training_set_client.py +14 -17
  48. wedata/tempo/interpol.py +2 -2
  49. tencent_wedata_feature_engineering_dev-0.1.42.dist-info/RECORD +0 -64
  50. {tencent_wedata_feature_engineering_dev-0.1.42.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/top_level.txt +0 -0
  51. /wedata/{feature_store/cloud_sdk_client → common}/__init__.py +0 -0
  52. /wedata/{feature_store/common/protos → common/constants}/__init__.py +0 -0
  53. /wedata/{feature_store → common}/entities/__init__.py +0 -0
  54. /wedata/{feature_store → common}/entities/environment_variables.py +0 -0
  55. /wedata/{feature_store → common}/entities/feature.py +0 -0
  56. /wedata/{feature_store → common}/entities/feature_function.py +0 -0
  57. /wedata/{feature_store → common}/entities/feature_spec_constants.py +0 -0
  58. /wedata/{feature_store → common}/entities/feature_table.py +0 -0
  59. /wedata/{feature_store/feast_client → common/protos}/__init__.py +0 -0
  60. /wedata/{feature_store/common → common}/protos/feature_store_pb2.py +0 -0
  61. /wedata/{feature_store/spark_client → common/utils}/__init__.py +0 -0
  62. /wedata/{feature_store → common}/utils/topological_sort.py +0 -0
  63. /wedata/{feature_store → common}/utils/validation_utils.py +0 -0
  64. /wedata/{feature_store/utils → feature_engineering/ml_training_client}/__init__.py +0 -0
@@ -2,7 +2,7 @@
2
2
  特征表操作相关工具方法
3
3
  """
4
4
  import json
5
- from typing import Union, List, Dict, Optional, Sequence, Any
5
+ from typing import Union, List, Dict, Optional, Any
6
6
 
7
7
  import tencentcloud.common.exception
8
8
  from pyspark.sql import DataFrame, SparkSession
@@ -10,23 +10,23 @@ from pyspark.sql.streaming import StreamingQuery
10
10
  from pyspark.sql.types import StructType
11
11
  import os
12
12
  import datetime
13
- from wedata.feature_store.constants.constants import (
13
+ from wedata.common.constants.constants import (
14
14
  APPEND, DEFAULT_WRITE_STREAM_TRIGGER, FEATURE_TABLE_KEY,
15
15
  FEATURE_TABLE_VALUE, FEATURE_TABLE_PROJECT, FEATURE_TABLE_TIMESTAMP,
16
16
  FEATURE_TABLE_BACKUP_PRIMARY_KEY, FEATURE_DLC_TABLE_PRIMARY_KEY)
17
- from wedata.feature_store.constants.engine_types import EngineTypes
17
+ from wedata.common.constants.engine_types import EngineTypes
18
+ from wedata.common.log import get_logger
18
19
  from wedata.feature_store.common.store_config.redis import RedisStoreConfig
19
- from wedata.feature_store.entities.feature_table import FeatureTable
20
- from wedata.feature_store.spark_client.spark_client import SparkClient
21
- from wedata.feature_store.utils import common_utils, env_utils
22
- from wedata.feature_store.feast_client.feast_client import FeastClient
23
- from wedata.feature_store.cloud_sdk_client.models import (
24
- TaskSchedulerConfiguration, OnlineFeatureConfiguration, OfflineFeatureConfiguration,
25
- CreateOnlineFeatureTableRequest, DescribeNormalSchedulerExecutorGroupsRequest, RefreshFeatureTableRequest)
26
- from wedata.feature_store.cloud_sdk_client.client import FeatureCloudSDK
20
+ from wedata.common.entities.feature_table import FeatureTable
21
+ from wedata.common.spark_client import SparkClient
22
+ from wedata.common.utils import common_utils, env_utils
23
+ from wedata.common.feast_client.feast_client import FeastClient
24
+ from wedata.common.cloud_sdk_client import models
25
+ from wedata.common.cloud_sdk_client import FeatureCloudSDK
26
+ from wedata.common.base_table_client import AbstractBaseTableClient
27
27
 
28
28
 
29
- class FeatureTableClient:
29
+ class FeatureTableClient(AbstractBaseTableClient):
30
30
  """特征表操作类"""
31
31
 
32
32
  def __init__(
@@ -44,6 +44,11 @@ class FeatureTableClient:
44
44
  self.__cloud_secret_id, self.__cloud_secret_key = env_utils.get_cloud_secret()
45
45
  self.__project = env_utils.get_project_id()
46
46
  self.__region = env_utils.get_region()
47
+ self.__logger = get_logger()
48
+ default_online_table = self._get_offline_default_database()
49
+ if default_online_table:
50
+ env_utils.set_default_database(default_online_table.DatabaseName)
51
+
47
52
 
48
53
  @property
49
54
  def cloud_secret_id(self) -> str:
@@ -77,51 +82,6 @@ class FeatureTableClient:
77
82
  def region(self) -> str:
78
83
  return self.__region
79
84
 
80
- @staticmethod
81
- def _normalize_params(
82
- param: Optional[Union[str, Sequence[str]]],
83
- default_type: type = list
84
- ) -> list:
85
- """统一处理参数标准化"""
86
- if param is None:
87
- return default_type()
88
- return list(param) if isinstance(param, Sequence) else [param]
89
-
90
- @staticmethod
91
- def _validate_schema(df: DataFrame, schema: StructType):
92
- """校验DataFrame和schema的有效性和一致性"""
93
- # 检查是否同时为空
94
- if df is None and schema is None:
95
- raise ValueError("Either DataFrame or schema must be provided")
96
-
97
- # 检查schema匹配
98
- if df is not None and schema is not None:
99
- df_schema = df.schema
100
- if df_schema != schema:
101
- diff_fields = set(df_schema.fieldNames()).symmetric_difference(set(schema.fieldNames()))
102
- raise ValueError(
103
- f"DataFrame schema does not match. Differences: "
104
- f"{diff_fields if diff_fields else 'field type mismatch'}"
105
- )
106
-
107
- @staticmethod
108
- def _validate_key_conflicts(primary_keys: List[str], timestamp_keys: str):
109
- """校验主键与时间戳键是否冲突"""
110
- if timestamp_keys in primary_keys:
111
- raise ValueError(f"Timestamp keys conflict with primary keys: {timestamp_keys}")
112
-
113
- @staticmethod
114
- def _validate_key_exists(primary_keys: List[str], timestamp_keys: str):
115
- """校验主键与时间戳键是否存在"""
116
- if not primary_keys:
117
- raise ValueError("Primary keys cannot be empty")
118
- if not timestamp_keys:
119
- raise ValueError("Timestamp keys cannot be empty")
120
-
121
- @staticmethod
122
- def _escape_sql_value(value: str) -> str:
123
- """转义SQL值中的特殊字符"""
124
- return value.replace("'", "''")
125
85
 
126
86
  def create_table(
127
87
  self,
@@ -165,6 +125,10 @@ class FeatureTableClient:
165
125
  primary_keys = self._normalize_params(primary_keys)
166
126
  partition_columns = self._normalize_params(partition_columns)
167
127
 
128
+ assert self._check_sequence_element_type(primary_keys, str), "primary_keys must be a list of strings"
129
+ assert self._check_sequence_element_type(partition_columns, str), "partition_columns must be a list of strings"
130
+ assert isinstance(timestamp_key, str), "timestamp key must be string"
131
+
168
132
  # 元数据校验
169
133
  self._validate_schema(df, schema)
170
134
  self._validate_key_exists(primary_keys, timestamp_key)
@@ -301,7 +265,7 @@ class FeatureTableClient:
301
265
  raise ValueError(f"Engine type {engine_type} is not supported")
302
266
 
303
267
  # 打印sql
304
- print(f"create table ddl: {ddl}\n")
268
+ self.__logger.info(f"create table ddl: {ddl}\n")
305
269
 
306
270
  # 执行DDL
307
271
  try:
@@ -311,8 +275,6 @@ class FeatureTableClient:
311
275
  except Exception as e:
312
276
  raise ValueError(f"Failed to create table: {str(e)}") from e
313
277
 
314
- print("async table info to feast")
315
-
316
278
  self._feast_client.create_table(
317
279
  table_name=table_name,
318
280
  primary_keys=primary_keys,
@@ -323,7 +285,7 @@ class FeatureTableClient:
323
285
  description=description
324
286
  )
325
287
 
326
- print(f"create table {name} done")
288
+ self.__logger.info(f"Table '{name}' created successfully. Starting web synchronization.")
327
289
 
328
290
  try:
329
291
  self._sync_table_info(table_name=name, action_name="create",
@@ -331,7 +293,8 @@ class FeatureTableClient:
331
293
  data_source_name=data_source_name, engine_name=env_utils.get_engine_name(),
332
294
  is_try=False)
333
295
  except tencentcloud.common.exception.TencentCloudSDKException as e:
334
- raise RuntimeError(f"Table '{name}' is can't create. {str(e)}")
296
+ raise RuntimeError(f"Failed to synchronize web data for table '{name}'. "
297
+ f"Please manually operate on the web page. Error: {str(e)}")
335
298
 
336
299
  # 构建并返回FeatureTable对象
337
300
  return FeatureTable(
@@ -444,6 +407,10 @@ class FeatureTableClient:
444
407
  common_utils.validate_table_name(name)
445
408
  common_utils.validate_database(database_name)
446
409
 
410
+ if primary_keys:
411
+ assert self._check_sequence_element_type(primary_keys, str), "primary_keys must be a list of strings"
412
+ assert isinstance(timestamp_key, str), "timestamp key must be string"
413
+
447
414
  # 构建完整表名
448
415
  table_name = common_utils.build_full_table_name(name, database_name)
449
416
 
@@ -456,11 +423,12 @@ class FeatureTableClient:
456
423
 
457
424
  # 检查Primary Key和Timestamp Key是否为空
458
425
  if engine_type == engine_type.ICEBERG_ENGINE and props.get("format-version", "") == "2":
459
- if props.get('dlc.ao.data.govern.sorted.keys', "") == "":
460
- raise ValueError(
461
- "table dlc.ao.data.govern.sorted.keys is empty. you must set dlc.ao.data.govern.sorted.keys")
462
- else:
463
- primary_keys = props.get('dlc.ao.data.govern.sorted.keys').split(",")
426
+ if not primary_keys:
427
+ if props.get('dlc.ao.data.govern.sorted.keys', "") == "":
428
+ raise ValueError(
429
+ "table dlc.ao.data.govern.sorted.keys is empty. you must set dlc.ao.data.govern.sorted.keys")
430
+ else:
431
+ primary_keys = props.get('dlc.ao.data.govern.sorted.keys').split(",")
464
432
  elif engine_type == engine_type.HIVE_ENGINE:
465
433
  if not primary_keys:
466
434
  raise ValueError("primary_keys cannot be None for HIVE_ENGINE")
@@ -513,27 +481,28 @@ class FeatureTableClient:
513
481
 
514
482
  try:
515
483
  self._sync_table_info(table_name=name, action_name="create",
516
- database_name=env_utils.get_database_name(database_name),
517
- data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=True)
484
+ database_name=env_utils.get_database_name(database_name),
485
+ data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=True)
518
486
  except tencentcloud.common.exception.TencentCloudSDKException as e:
519
487
  raise RuntimeError(f"Table '{name}' is can't create. {str(e)}")
520
488
 
521
489
  # 执行修改
522
- print("alter table sql", alter_sql)
490
+ self.__logger.info(f"alter table sql: \n {alter_sql}")
523
491
  self._spark.sql(alter_sql)
524
- print("Execute sql done, start sync table info to feast")
492
+ self.__logger.debug("Execute sql done, start sync table info to feast")
525
493
  self._feast_client.alter_table(full_table_name=table_name, primary_keys=primary_keys,
526
494
  timestamp_key=timestamp_key)
527
- print(f"Successfully register table '{table_name}'")
495
+ self.__logger.info(f"Successfully register table '{table_name}'. Starting web synchronization.")
528
496
 
529
497
  try:
530
498
  self._sync_table_info(table_name=name, action_name="create",
531
- database_name=env_utils.get_database_name(database_name),
532
- data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=False)
499
+ database_name=env_utils.get_database_name(database_name),
500
+ data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=False)
533
501
  except tencentcloud.common.exception.TencentCloudSDKException as e:
534
- raise RuntimeError(f"sync table info failed. you need to sync table info manually. {str(e)}")
535
- except ValueError as e:
536
- raise # 直接抛出已知的ValueError
502
+ raise RuntimeError(f"Failed to synchronize web data for table '{name}'. "
503
+ f"Please manually operate on the web page. Error: {str(e)}")
504
+ except (ValueError, RuntimeError):
505
+ raise
537
506
  except Exception as e:
538
507
  raise RuntimeError(f"Failed to modify properties for table '{table_name}': {str(e)}") from e
539
508
 
@@ -610,36 +579,34 @@ class FeatureTableClient:
610
579
  try:
611
580
  # 检查表是否存在
612
581
  if not self._check_table_exists(table_name):
613
- print(f"Table '{name}' does not exist")
582
+ self.__logger.error(f"Table '{name}' does not exist")
614
583
  return
615
584
 
616
- # 检查
617
- df = self._spark.table(tableName=table_name)
618
-
619
585
  try:
620
586
  feature_view = self._feast_client.get_feature_view(table_name)
621
587
  except Exception as e:
622
- print(f"Table '{name}' is not a feature table, skip delete. {str(e)}")
588
+ pass
589
+ # self.__logger.warning(f"Table '{name}' is not a feature table, skip delete. {str(e)}")
623
590
  else:
624
591
  if feature_view.online:
625
592
  raise ValueError(f"Table '{name}' has a online table, please call drop_online_table first")
626
593
  try:
627
594
  self._sync_table_info(table_name=name, action_name="delete",
628
- database_name=env_utils.get_database_name(database_name),
629
- data_source_name="", engine_name=env_utils.get_engine_name(), is_try=True)
595
+ database_name=env_utils.get_database_name(database_name),
596
+ data_source_name="", engine_name=env_utils.get_engine_name(), is_try=True)
630
597
  except tencentcloud.common.exception.TencentCloudSDKException as e:
631
598
  raise RuntimeError(f"Table '{name}' is can't delete. {str(e)}")
632
599
 
633
600
  # 执行删除
634
601
  self._spark.sql(f"DROP TABLE {table_name}")
635
- print(f"Table '{name}' dropped")
602
+ self.__logger.info(f"Table '{name}' dropped")
636
603
  try:
637
604
  self._feast_client.remove_offline_table(table_name=table_name)
638
605
  except Exception as e:
639
606
  raise
640
607
  # raise ValueError(f"Failed to delete table '{name}' in feast: {str(e)}")
641
608
  else:
642
- print(f"Table '{name}' removed from feast")
609
+ self.__logger.info(f"Table '{name}' removed from feast")
643
610
 
644
611
  try:
645
612
  self._sync_table_info(table_name=name, action_name="delete",
@@ -695,7 +662,7 @@ class FeatureTableClient:
695
662
  schema_name_list = [field.name for field in tmp_schema_list]
696
663
  schema = StructType(tmp_schema_list)
697
664
  for field in schema:
698
- print(f"{field.name} => {field.dataType}")
665
+ self.__logger.debug(f"translate {field.name} to feast Type: {field.dataType}")
699
666
 
700
667
  feast_client = FeastClient(offline_store=self._spark, online_store_config=online_config)
701
668
  # 构建离线表的entity的数据过滤
@@ -710,7 +677,7 @@ class FeatureTableClient:
710
677
  full_table_name=full_table_name,
711
678
  columns_name=columns_name_list,
712
679
  entity_rows=[result_row.asDict()])
713
- print("=====>read online dataframe:\n", online_view[schema_name_list])
680
+ self.__logger.debug(f"=====>read online dataframe:\n{online_view[schema_name_list]}")
714
681
  return self._spark.createDataFrame(online_view[schema_name_list], schema=schema, verifySchema=False)
715
682
  else:
716
683
  return self._spark.createDataFrame([])
@@ -719,7 +686,7 @@ class FeatureTableClient:
719
686
  full_table_name=full_table_name,
720
687
  columns_name=columns_name_list,
721
688
  entity_rows=entity_row)
722
- print("=====>read online dataframe:\n", online_view[schema_name_list])
689
+ self.__logger.debug(f"=====>read online dataframe:\n{online_view[schema_name_list]}")
723
690
  return self._spark.createDataFrame(online_view[schema_name_list], schema=schema, verifySchema=False)
724
691
 
725
692
  def get_table(
@@ -808,7 +775,9 @@ class FeatureTableClient:
808
775
 
809
776
  # 执行修改
810
777
  self._spark.sql(alter_sql)
811
- self._feast_client.modify_tags(table_name=table_name, tags=properties)
778
+ tbl_pro = self._spark.sql(f"SHOW TBLPROPERTIES {table_name}")
779
+ props = {row['key']: row['value'] for row in tbl_pro.collect()}
780
+ self._feast_client.modify_tags(table_name=table_name, tags=props)
812
781
  print(f"Successfully updated properties for table '{name}': {list(properties.keys())}")
813
782
 
814
783
  except ValueError as e:
@@ -816,9 +785,9 @@ class FeatureTableClient:
816
785
  except Exception as e:
817
786
  raise RuntimeError(f"Failed to modify properties for table '{name}': {str(e)}") from e
818
787
 
819
- def publish_table(self, table_name: str, data_source_name: str, cloud_secret_id: str, cloud_secret_key: str,
788
+ def publish_table(self, table_name: str, data_source_name: str,
820
789
  database_name: Optional[str] = None,
821
- is_cycle: bool = False, cycle_obj: TaskSchedulerConfiguration = None,
790
+ is_cycle: bool = False, cycle_obj: models.TaskSchedulerConfiguration = None,
822
791
  is_use_default_online: bool = True, online_config: RedisStoreConfig = None):
823
792
  """
824
793
  将离线特征表发布为在线特征表
@@ -841,46 +810,43 @@ class FeatureTableClient:
841
810
  # 检查是否已经发布,查看Redis中是否有值
842
811
  try:
843
812
  # 获取离线表的列名
844
- online_data = self._read_online_table(
845
- table_name=table_name,
846
- database_name=database_name,
847
- online_config=online_config)
813
+ online_data = self._feast_client.get_feature_view(full_table_name)
848
814
  except Exception as e:
849
815
  print(f"Failed to get online table view for table '{full_table_name}': {str(e)}")
850
816
  else:
851
- if online_data:
817
+ if online_data.online:
852
818
  raise ValueError(f"Table '{full_table_name}' has already been published")
853
819
 
854
820
  # 配置周期性参数
855
821
  if is_cycle:
856
- if not isinstance(cycle_obj, TaskSchedulerConfiguration):
822
+ if not isinstance(cycle_obj, models.TaskSchedulerConfiguration):
857
823
  raise ValueError("cycle_obj must be a TaskSchedulerConfiguration object when is_cycle is True")
858
824
 
859
825
  cycle_obj.CycleType = "CRONTAB_CYCLE"
860
826
  else:
861
- if isinstance(cycle_obj, TaskSchedulerConfiguration):
827
+ if isinstance(cycle_obj, models.TaskSchedulerConfiguration):
862
828
  cycle_obj.CycleType = "ONEOFF_CYCLE"
863
829
  else:
864
- cycle_obj = TaskSchedulerConfiguration()
830
+ cycle_obj = models.TaskSchedulerConfiguration()
865
831
  cycle_obj.CycleType = "ONEOFF_CYCLE"
866
832
  # 设置默认当前时间延后1分钟
867
833
  cycle_obj.CrontabExpression = (datetime.datetime.now() + datetime.timedelta(minutes=3)).strftime(
868
834
  "%M %H %d %m %w ? %y")
869
835
 
870
836
  if is_use_default_online:
871
- online_feature_config = OnlineFeatureConfiguration()
837
+ online_feature_config = models.OnlineFeatureConfiguration()
872
838
  online_feature_config.UserDefault = True
873
839
  else:
874
840
  if not isinstance(online_config, RedisStoreConfig):
875
841
  raise ValueError("online_config must be a RedisStoreConfig object when is_use_default_online is False")
876
842
 
877
- online_feature_config = OnlineFeatureConfiguration()
843
+ online_feature_config = models.OnlineFeatureConfiguration()
878
844
  online_feature_config.UserDefault = False
879
845
  online_feature_config.Host = online_config.host
880
846
  online_feature_config.Port = online_config.port
881
847
  online_feature_config.DB = online_config.db
882
848
 
883
- offline_feature_config = OfflineFeatureConfiguration()
849
+ offline_feature_config = models.OfflineFeatureConfiguration()
884
850
  offline_feature_config.DatabaseName = env_utils.get_database_name(database_name)
885
851
  offline_feature_config.TableName = table_name
886
852
 
@@ -891,7 +857,7 @@ class FeatureTableClient:
891
857
  offline_feature_config.DatasourceType = env_utils.get_engine_type()
892
858
  offline_feature_config.EngineName = env_utils.get_engine_name()
893
859
 
894
- api_requests = CreateOnlineFeatureTableRequest()
860
+ api_requests = models.CreateOnlineFeatureTableRequest()
895
861
  api_requests.OfflineFeatureConfiguration = offline_feature_config
896
862
  api_requests.OnlineFeatureConfiguration = online_feature_config
897
863
  api_requests.TaskSchedulerConfiguration = cycle_obj
@@ -899,11 +865,11 @@ class FeatureTableClient:
899
865
  region = env_utils.get_region()
900
866
  if not os.environ.get("RESOURCE_GROUP_ID", ""):
901
867
  res_group_item = _get_default_resource_group(
902
- api_requests.ProjectId, cloud_secret_id, cloud_secret_key, region)
868
+ api_requests.ProjectId, self.__cloud_secret_id, self.__cloud_secret_key, region)
903
869
  api_requests.ResourceGroupId = res_group_item.ExecutorGroupId
904
870
  else:
905
871
  api_requests.ResourceGroupId = os.environ.get("RESOURCE_GROUP_ID")
906
- client = FeatureCloudSDK(secret_id=cloud_secret_id, secret_key=cloud_secret_key, region=region)
872
+ client = FeatureCloudSDK(secret_id=self.__cloud_secret_id, secret_key=self.__cloud_secret_key, region=region)
907
873
  resp = client.CreateOnlineFeatureTable(api_requests)
908
874
  if cycle_obj.CycleType == "ONEOFF_CYCLE":
909
875
  print(f"publish online task create success. it will be execute after 3 min. {resp.Data.OnlineTableId} {resp.Data.OfflineTableId} ")
@@ -914,7 +880,18 @@ class FeatureTableClient:
914
880
  # 构建完整表名
915
881
  full_table_name = common_utils.build_full_table_name(table_name, database_name)
916
882
  feast_client = FeastClient(self._spark, online_config)
883
+ try:
884
+ self._sync_table_info(table_name=table_name, database_name=database_name, action_name="delete_online",
885
+ data_source_name="", engine_name=env_utils.get_engine_name(), is_try=True)
886
+ except Exception as e:
887
+ raise RuntimeError(f"drop online table failed. table_name: {full_table_name}. {str(e)}")
888
+
917
889
  feast_client.remove_online_table(full_table_name)
890
+ try:
891
+ self._sync_table_info(table_name=table_name, database_name=database_name, action_name="delete_online",
892
+ data_source_name="", engine_name=env_utils.get_engine_name(), is_try=False)
893
+ except Exception as e:
894
+ raise RuntimeError(f"drop online table failed. table_name: {full_table_name}. {str(e)}")
918
895
  print(f"drop online table success. table_name: {full_table_name}")
919
896
 
920
897
  def _get_table_primary_keys_and_timestamp_key(self, full_table_name: str) -> 'str, str':
@@ -933,10 +910,23 @@ class FeatureTableClient:
933
910
  def _check_table_exists(self, full_table_name: str) -> bool:
934
911
  return common_utils.check_spark_table_exists(self._spark, full_table_name)
935
912
 
913
+ def _get_offline_default_database(self) -> Optional[models.FeatureStoreDatabase]:
914
+ client = FeatureCloudSDK(secret_id=self.__cloud_secret_id, secret_key=self.__cloud_secret_key,
915
+ region=self.__region)
916
+ req = models.DescribeFeatureStoreDatabasesRequest()
917
+ req.ProjectId = self.__project
918
+ rsp = client.DescribeFeatureStoreDatabases(req)
919
+ if len(rsp.Data) == 0:
920
+ return None
921
+ for item in rsp.Data:
922
+ if item.OnlineMode == 0 and item.IsDefault == 1:
923
+ return item
924
+ return None
925
+
936
926
 
937
927
  def _get_default_resource_group(project_id: str, secret_id: str, secret_key: str, region: str):
938
928
  client = FeatureCloudSDK(secret_id=secret_id, secret_key=secret_key, region=region)
939
- request = DescribeNormalSchedulerExecutorGroupsRequest()
929
+ request = models.DescribeNormalSchedulerExecutorGroupsRequest()
940
930
  request.ProjectId = project_id
941
931
  resp = client.DescribeNormalSchedulerExecutorGroups(request)
942
932
  # 默认取第一个健康可用的资源组进行执行
@@ -950,7 +940,7 @@ def _refresh_table(project_id: str, secret_id: str, secret_key: str, region: str
950
940
  action: str, database_name: str, data_source_name: str, data_source_type: str,
951
941
  engine_name: str, is_try: bool):
952
942
  client = FeatureCloudSDK(secret_id=secret_id, secret_key=secret_key, region=region)
953
- request = RefreshFeatureTableRequest()
943
+ request = models.RefreshFeatureTableRequest()
954
944
  request.ProjectId = project_id
955
945
  request.TableName = table_name
956
946
  request.DatabaseName = database_name
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  import os
3
3
  from types import ModuleType
4
- from typing import Any, List, Optional, Set, Union, Dict
4
+ from typing import Any, List, Optional, Union, Dict
5
5
 
6
6
  import mlflow
7
7
  from mlflow.models import Model
@@ -9,18 +9,17 @@ from mlflow.utils.file_utils import TempDir, read_yaml
9
9
  from pyspark.sql import DataFrame
10
10
  from pyspark.sql.functions import struct
11
11
 
12
- from wedata.feature_store.constants import constants
13
- from wedata.feature_store.entities.feature_function import FeatureFunction
14
- from wedata.feature_store.entities.feature_lookup import FeatureLookup
15
- from wedata.feature_store.entities.feature_spec import FeatureSpec
16
- from wedata.feature_store.entities.feature_table import FeatureTable
17
- from wedata.feature_store.entities.training_set import TrainingSet
12
+ from wedata.common.constants import constants
13
+ from wedata.common.entities.feature_function import FeatureFunction
14
+ from wedata.common.entities.feature_lookup import FeatureLookup
15
+ from wedata.common.entities.feature_spec import FeatureSpec
16
+ from wedata.common.entities.training_set import TrainingSet
18
17
  from wedata.feature_store.mlflow_model import _FeatureStoreModelWrapper
19
- from wedata.feature_store.spark_client.spark_client import SparkClient
20
- from wedata.feature_store.utils import validation_utils
21
- from wedata.feature_store.entities.feature_table import FeatureTable
18
+ from wedata.common.spark_client import SparkClient
19
+ from wedata.common.utils import validation_utils, common_utils, training_set_utils
20
+ from wedata.common.entities.feature_table import FeatureTable
22
21
 
23
- from wedata.feature_store.constants.constants import (
22
+ from wedata.common.constants.constants import (
24
23
  _NO_RESULT_TYPE_PASSED,
25
24
  _USE_SPARK_NATIVE_JOIN,
26
25
  MODEL_DATA_PATH_ROOT,
@@ -28,8 +27,8 @@ from wedata.feature_store.constants.constants import (
28
27
  _PREBUILT_ENV_URI
29
28
  )
30
29
 
31
- from wedata.feature_store.utils import common_utils, training_set_utils, uc_utils
32
- from wedata.feature_store.utils.signature_utils import get_mlflow_signature_from_feature_spec, \
30
+ from wedata.common.utils import uc_utils
31
+ from wedata.common.utils.signature_utils import get_mlflow_signature_from_feature_spec, \
33
32
  drop_signature_inputs_and_invalid_params
34
33
 
35
34
  _logger = logging.getLogger(__name__)
@@ -360,6 +359,7 @@ class TrainingSetClient:
360
359
  env_manager: Optional[str] = None,
361
360
  local_uri: Optional[str] = None,
362
361
  params: Optional[dict[str, Any]] = None,
362
+ timestamp_key: str = None,
363
363
  **kwargs,
364
364
  ) -> DataFrame:
365
365
  # TODO:ML 待确定是否需要
@@ -388,16 +388,13 @@ class TrainingSetClient:
388
388
  "The provided DataFrame for scoring must have unique column names. Found duplicates {}.",
389
389
  )
390
390
  artifact_path = os.path.join("artifacts", MODEL_DATA_PATH_ROOT)
391
- # print(f"artifact_path: {artifact_path}")
392
391
  with (TempDir() as tmp_location):
393
392
  local_path = (
394
393
  local_uri
395
394
  if local_uri
396
395
  else common_utils.download_model_artifacts(model_uri, tmp_location.path())
397
396
  )
398
- # print(f"wedata local_path:{local_path}")
399
397
  model_data_path = os.path.join(local_path, artifact_path)
400
- # print(f"artifact_path: {artifact_path}")
401
398
 
402
399
  # Augment local workspace metastore tables from 2L to 3L,
403
400
  # this will prevent us from erroneously reading data from other catalogs
@@ -425,6 +422,7 @@ class TrainingSetClient:
425
422
  feature_input_keys = []
426
423
  for fci in feature_spec.feature_column_infos:
427
424
  feature_input_keys.extend([k for k in fci.lookup_key])
425
+
428
426
  on_demand_input_names = uc_utils.get_unique_list_order(
429
427
  [
430
428
  input_name
@@ -436,7 +434,6 @@ class TrainingSetClient:
436
434
  source_data_names = [
437
435
  sdci.name for sdci in feature_spec.source_data_column_infos
438
436
  ]
439
- # print(f"wedata source_data_names:{source_data_names}")
440
437
 
441
438
  feature_output_names = [
442
439
  fci.output_name for fci in feature_spec.feature_column_infos
wedata/tempo/interpol.py CHANGED
@@ -23,7 +23,7 @@ class Interpolation:
23
23
  """
24
24
  Validate if the fill provided is within the allowed list of values.
25
25
 
26
- :param fill: Fill type e.g. "zero", "null", "bfill", "ffill", "linear"
26
+ :param method: Fill type e.g. "zero", "null", "bfill", "ffill", "linear"
27
27
  """
28
28
  if method not in method_options:
29
29
  raise ValueError(
@@ -43,8 +43,8 @@ class Interpolation:
43
43
 
44
44
  :param df: DataFrame to be validated
45
45
  :param partition_cols: Partition columns to be validated
46
- :param target_col: Target column to be validated
47
46
  :param ts_col: Timestamp column to be validated
47
+ :param ts_col_dtype: Timestamp column type
48
48
  """
49
49
 
50
50
  if partition_cols is not None:
@@ -1,64 +0,0 @@
1
- wedata/__init__.py,sha256=GYxqkkgH0oH4QtNiOCZHuGkc0sSH1LgEqmhSX6sB4So,200
2
- wedata/feature_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- wedata/feature_store/client.py,sha256=rXQfSC14vvxPed9P0PoboDTwhD47aPwmGndY825Sl6k,19484
4
- wedata/feature_store/mlflow_model.py,sha256=OCUuccOoO0NXWSzIPoGeL03Ha1Q3aQTJW2RlJrTCmzc,554
5
- wedata/feature_store/cloud_sdk_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- wedata/feature_store/cloud_sdk_client/client.py,sha256=NtzV0EmLTzMLYqWhnUyOx3bmxoIIgxw8REid-Gy96Pc,4234
7
- wedata/feature_store/cloud_sdk_client/models.py,sha256=7_QUq0kZcrcclRMsIYFoqBrlzVwaHoVY-yU5SHIrJWM,19789
8
- wedata/feature_store/cloud_sdk_client/utils.py,sha256=6ESwVhlrftnp0h9ojTzbB-m-0hktLI1PLcyk6zpNgrs,857
9
- wedata/feature_store/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- wedata/feature_store/common/protos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- wedata/feature_store/common/protos/feature_store_pb2.py,sha256=oMIUGGeGNP84g_nFqOQwTXjV1GiU2ehSOy7CyFu2__g,4207
12
- wedata/feature_store/common/store_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- wedata/feature_store/common/store_config/redis.py,sha256=9R5npM2s1u0o9IakmpbRsFdJC0vNar_uvA62OLWuXBs,1145
14
- wedata/feature_store/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- wedata/feature_store/constants/constants.py,sha256=d4xnvPZeVKMFi8cYp6Ya9GdoUl9NQzyH4hIaXOgTWd4,1959
16
- wedata/feature_store/constants/engine_types.py,sha256=42mI-kNDDtoA4_I3iqDe4FkF2M2l_Bt4Q1V6WUB-_k0,921
17
- wedata/feature_store/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- wedata/feature_store/entities/column_info.py,sha256=-AR6EKHwgoqIkRHFyguxVEtnYt6fvusWHkEjF4kvS0A,5141
19
- wedata/feature_store/entities/environment_variables.py,sha256=ZEFml5H9MQuzBKM074mUrFYu-Sga4Knmxqiwpke2WGc,1679
20
- wedata/feature_store/entities/feature.py,sha256=wX8fTBlJq3GYdj9rrBDCY3kFgcVBBAiOOZdxEhnQkNQ,1241
21
- wedata/feature_store/entities/feature_column_info.py,sha256=ZAS_I-MDg2ofCv3nwYvGCQrrpEljzrh_L1D-gqOV_mM,2407
22
- wedata/feature_store/entities/feature_function.py,sha256=R17INrCE-U_Uj9KLbFz69aYlOkTETTwQHMMo470F4lQ,1865
23
- wedata/feature_store/entities/feature_lookup.py,sha256=UYmYCzkQ1_KuooybS3F-7HDcjBMPZ72InL06UTHbEtw,8749
24
- wedata/feature_store/entities/feature_spec.py,sha256=Z2SXE_LObjNY3q5yBVKPXGTUiMZy7zaI6-ZbAoFlwG8,21769
25
- wedata/feature_store/entities/feature_spec_constants.py,sha256=YWDBfRiNDe6fUJFUBo3V4WYg2xsljoPAE-ZejfFZCgM,785
26
- wedata/feature_store/entities/feature_table.py,sha256=nHCCd7WUryROt9oTJpYkT-KiGbKcQd7BEE9L2_1dhYw,4107
27
- wedata/feature_store/entities/feature_table_info.py,sha256=yJ1P3AYaPiHW6ehCbMWhndzguHJqJKWfeFwYjwTLt2U,1481
28
- wedata/feature_store/entities/function_info.py,sha256=yDwIzTrBR-ECWubgeoy48SYZfdY7P0JcraZnWGCW0ag,2752
29
- wedata/feature_store/entities/on_demand_column_info.py,sha256=a44ep-f3FOruWNXl3c8v7733rNuoKXJaHTv1aqF905s,1739
30
- wedata/feature_store/entities/source_data_column_info.py,sha256=FyBmBPUSvc2S2OPFTvsQf2AdS-KFGkYBmd4yL_Vur8M,702
31
- wedata/feature_store/entities/training_set.py,sha256=ylt1h6Z_xU8hKYvnvd80CeewTGSN68-_kvFpoliwH7s,5679
32
- wedata/feature_store/feast_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- wedata/feature_store/feast_client/feast_client.py,sha256=TT1ESVYbP9kT5LyfYEYfnsM2mvOiJBug3527Y73lg3o,20694
34
- wedata/feature_store/feature_table_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- wedata/feature_store/feature_table_client/feature_table_client.py,sha256=wLawB-97ZJ4og5Y8f0VfRYsuygXY6DzMeorTTzdA9mM,41176
36
- wedata/feature_store/spark_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- wedata/feature_store/spark_client/spark_client.py,sha256=ACWaRaDChqr2qAM8st6e0BP-LSEBSIj2xZAElX2nxHE,11799
38
- wedata/feature_store/training_set_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- wedata/feature_store/training_set_client/training_set_client.py,sha256=teaWM-xDgp2TwnadovUm0i4A26roTozgRefIZaHORko,23376
40
- wedata/feature_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- wedata/feature_store/utils/common_utils.py,sha256=vkpoXxZTd6S-2MgdyTQZ6P_ckdqNSK50ECuVBG2BwfI,12314
42
- wedata/feature_store/utils/env_utils.py,sha256=ffDhQVW96vZxWurzOhYfZQk_dQ1LHC-y8tl4PQlp9Tg,2570
43
- wedata/feature_store/utils/feature_lookup_utils.py,sha256=mNV6RhBdpv1iTZduCA9YwXwkeJCwU5MFQ1MkFeD9IhY,22003
44
- wedata/feature_store/utils/feature_spec_utils.py,sha256=j8t-zel2_r8Q9m88BmFKkHMdkGNIduWJB-28OZDASRY,11613
45
- wedata/feature_store/utils/feature_utils.py,sha256=KKq28bVB_lCuhnR9Hk6JegJBOVgcelWlvrRM-F9onkA,2796
46
- wedata/feature_store/utils/on_demand_utils.py,sha256=pazZRG5c0Se08MV_inBddIeX4Q9xlVN_H9SC_WK3xzs,4260
47
- wedata/feature_store/utils/schema_utils.py,sha256=y6EYY1pUxjVg6MP4C7avdW8ZEBBaDo1YTV2CmPF4i8o,4491
48
- wedata/feature_store/utils/signature_utils.py,sha256=SZFufd19m0jmGnOLmAl3JPKZC-qHq-wQezh6G7HOMfc,7773
49
- wedata/feature_store/utils/topological_sort.py,sha256=ebzKxmxeCLk9seB1zR0ASCGXsZsa-DjxJeTc4KUadtg,6475
50
- wedata/feature_store/utils/training_set_utils.py,sha256=MYsPZS1d9HKswHgjgxD8K7H9N3dWPyyTTx20Mkp4PVU,22497
51
- wedata/feature_store/utils/uc_utils.py,sha256=5jngdLT8quP1lfGHN_SSFQQlcOh_sUB9M1varCgdFwg,11436
52
- wedata/feature_store/utils/validation_utils.py,sha256=lJe6HCg5v5CZxH_pvT-vpGhCpo66LT2erXraHE2T0iI,2584
53
- wedata/tempo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- wedata/tempo/interpol.py,sha256=3JF8dwcdKv2o10FN45aefgvxR5DjlR6FJAXrbAiGCro,16423
55
- wedata/tempo/intervals.py,sha256=L2ao7LlgQmfDTFwnBoFLXeuygSvwtIKXL52thiD80Yw,44078
56
- wedata/tempo/io.py,sha256=KWIn6IBSkuBxr8QCcpdZ6NFX_49-8UQdGftmZgs_ujw,1872
57
- wedata/tempo/ml.py,sha256=WtGa2szn6PditvZsTZoxo7wFDe4k1SRoMZ-jgNGIjvE,4323
58
- wedata/tempo/resample.py,sha256=h81RVVmCl4ect-YKE-KZZHPDi1rGI3sh-YIb-Btz0ck,9698
59
- wedata/tempo/tsdf.py,sha256=S4lZfxhSRFiezYoYS6gvGsl1mZA3zp-MWEKFHYZpDg0,70968
60
- wedata/tempo/utils.py,sha256=I9I6l2DMwUoY213L04Yc1UR_zTWgSkj1BVo4ZwzQd4Y,7977
61
- tencent_wedata_feature_engineering_dev-0.1.42.dist-info/METADATA,sha256=92FGxlFyakPGdx3bwUjkpmImzCl5eB_KxiYMU2yqc_8,582
62
- tencent_wedata_feature_engineering_dev-0.1.42.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
63
- tencent_wedata_feature_engineering_dev-0.1.42.dist-info/top_level.txt,sha256=Xa0v1rh__RvfVTVDirW5r5UBKg7ZO_iuTeXfp8MNo2A,7
64
- tencent_wedata_feature_engineering_dev-0.1.42.dist-info/RECORD,,
File without changes
File without changes