tencent-wedata-feature-engineering-dev 0.1.48__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {tencent_wedata_feature_engineering_dev-0.1.48.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/METADATA +14 -3
  2. tencent_wedata_feature_engineering_dev-0.2.5.dist-info/RECORD +78 -0
  3. {tencent_wedata_feature_engineering_dev-0.1.48.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/WHEEL +1 -1
  4. wedata/__init__.py +1 -1
  5. wedata/common/base_table_client/__init__.py +1 -0
  6. wedata/common/base_table_client/base.py +58 -0
  7. wedata/common/cloud_sdk_client/__init__.py +2 -0
  8. wedata/{feature_store → common}/cloud_sdk_client/client.py +33 -3
  9. wedata/{feature_store → common}/cloud_sdk_client/models.py +212 -37
  10. wedata/{feature_store → common}/cloud_sdk_client/utils.py +7 -0
  11. wedata/{feature_store → common}/constants/constants.py +3 -2
  12. wedata/common/constants/engine_types.py +34 -0
  13. wedata/{feature_store → common}/entities/column_info.py +6 -5
  14. wedata/{feature_store → common}/entities/feature_column_info.py +2 -1
  15. wedata/{feature_store → common}/entities/feature_lookup.py +1 -1
  16. wedata/{feature_store → common}/entities/feature_spec.py +9 -9
  17. wedata/{feature_store → common}/entities/feature_table_info.py +1 -1
  18. wedata/{feature_store → common}/entities/function_info.py +2 -1
  19. wedata/{feature_store → common}/entities/on_demand_column_info.py +2 -1
  20. wedata/{feature_store → common}/entities/source_data_column_info.py +3 -1
  21. wedata/{feature_store → common}/entities/training_set.py +6 -6
  22. wedata/common/feast_client/__init__.py +1 -0
  23. wedata/{feature_store → common}/feast_client/feast_client.py +1 -1
  24. wedata/common/log/__init__.py +1 -0
  25. wedata/{feature_store/common → common}/log/logger.py +9 -5
  26. wedata/common/spark_client/__init__.py +1 -0
  27. wedata/{feature_store → common}/spark_client/spark_client.py +6 -7
  28. wedata/{feature_store → common}/utils/common_utils.py +7 -9
  29. wedata/{feature_store → common}/utils/env_utils.py +12 -0
  30. wedata/{feature_store → common}/utils/feature_lookup_utils.py +6 -6
  31. wedata/{feature_store → common}/utils/feature_spec_utils.py +13 -8
  32. wedata/{feature_store → common}/utils/feature_utils.py +5 -5
  33. wedata/{feature_store → common}/utils/on_demand_utils.py +5 -4
  34. wedata/{feature_store → common}/utils/schema_utils.py +1 -1
  35. wedata/{feature_store → common}/utils/signature_utils.py +4 -4
  36. wedata/{feature_store → common}/utils/training_set_utils.py +13 -13
  37. wedata/{feature_store → common}/utils/uc_utils.py +1 -1
  38. wedata/feature_engineering/__init__.py +1 -0
  39. wedata/feature_engineering/client.py +417 -0
  40. wedata/feature_engineering/ml_training_client/ml_training_client.py +569 -0
  41. wedata/feature_engineering/mlflow_model.py +9 -0
  42. wedata/feature_engineering/table_client/table_client.py +548 -0
  43. wedata/feature_store/client.py +11 -15
  44. wedata/feature_store/constants/engine_types.py +8 -30
  45. wedata/feature_store/feature_table_client/feature_table_client.py +73 -105
  46. wedata/feature_store/training_set_client/training_set_client.py +12 -23
  47. wedata/tempo/interpol.py +2 -2
  48. tencent_wedata_feature_engineering_dev-0.1.48.dist-info/RECORD +0 -66
  49. {tencent_wedata_feature_engineering_dev-0.1.48.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/top_level.txt +0 -0
  50. /wedata/{feature_store/cloud_sdk_client → common}/__init__.py +0 -0
  51. /wedata/{feature_store/common/log → common/constants}/__init__.py +0 -0
  52. /wedata/{feature_store/common/protos → common/entities}/__init__.py +0 -0
  53. /wedata/{feature_store → common}/entities/environment_variables.py +0 -0
  54. /wedata/{feature_store → common}/entities/feature.py +0 -0
  55. /wedata/{feature_store → common}/entities/feature_function.py +0 -0
  56. /wedata/{feature_store → common}/entities/feature_spec_constants.py +0 -0
  57. /wedata/{feature_store → common}/entities/feature_table.py +0 -0
  58. /wedata/{feature_store/entities → common/protos}/__init__.py +0 -0
  59. /wedata/{feature_store/common → common}/protos/feature_store_pb2.py +0 -0
  60. /wedata/{feature_store/feast_client → common/utils}/__init__.py +0 -0
  61. /wedata/{feature_store → common}/utils/topological_sort.py +0 -0
  62. /wedata/{feature_store → common}/utils/validation_utils.py +0 -0
  63. /wedata/{feature_store/spark_client → feature_engineering/ml_training_client}/__init__.py +0 -0
  64. /wedata/{feature_store/utils → feature_engineering/table_client}/__init__.py +0 -0
@@ -2,7 +2,7 @@
2
2
  特征表操作相关工具方法
3
3
  """
4
4
  import json
5
- from typing import Union, List, Dict, Optional, Sequence, Any
5
+ from typing import Union, List, Dict, Optional, Any
6
6
 
7
7
  import tencentcloud.common.exception
8
8
  from pyspark.sql import DataFrame, SparkSession
@@ -10,23 +10,23 @@ from pyspark.sql.streaming import StreamingQuery
10
10
  from pyspark.sql.types import StructType
11
11
  import os
12
12
  import datetime
13
- from wedata.feature_store.constants.constants import (
13
+ from wedata.common.constants.constants import (
14
14
  APPEND, DEFAULT_WRITE_STREAM_TRIGGER, FEATURE_TABLE_KEY,
15
15
  FEATURE_TABLE_VALUE, FEATURE_TABLE_PROJECT, FEATURE_TABLE_TIMESTAMP,
16
16
  FEATURE_TABLE_BACKUP_PRIMARY_KEY, FEATURE_DLC_TABLE_PRIMARY_KEY)
17
- from wedata.feature_store.constants.engine_types import EngineTypes
17
+ from wedata.common.constants.engine_types import EngineTypes
18
+ from wedata.common.log import get_logger
18
19
  from wedata.feature_store.common.store_config.redis import RedisStoreConfig
19
- from wedata.feature_store.entities.feature_table import FeatureTable
20
- from wedata.feature_store.spark_client.spark_client import SparkClient
21
- from wedata.feature_store.utils import common_utils, env_utils
22
- from wedata.feature_store.feast_client.feast_client import FeastClient
23
- from wedata.feature_store.cloud_sdk_client.models import (
24
- TaskSchedulerConfiguration, OnlineFeatureConfiguration, OfflineFeatureConfiguration,
25
- CreateOnlineFeatureTableRequest, DescribeNormalSchedulerExecutorGroupsRequest, RefreshFeatureTableRequest)
26
- from wedata.feature_store.cloud_sdk_client.client import FeatureCloudSDK
20
+ from wedata.common.entities.feature_table import FeatureTable
21
+ from wedata.common.spark_client import SparkClient
22
+ from wedata.common.utils import common_utils, env_utils
23
+ from wedata.common.feast_client.feast_client import FeastClient
24
+ from wedata.common.cloud_sdk_client import models
25
+ from wedata.common.cloud_sdk_client import FeatureCloudSDK
26
+ from wedata.common.base_table_client import AbstractBaseTableClient
27
27
 
28
28
 
29
- class FeatureTableClient:
29
+ class FeatureTableClient(AbstractBaseTableClient):
30
30
  """特征表操作类"""
31
31
 
32
32
  def __init__(
@@ -44,6 +44,11 @@ class FeatureTableClient:
44
44
  self.__cloud_secret_id, self.__cloud_secret_key = env_utils.get_cloud_secret()
45
45
  self.__project = env_utils.get_project_id()
46
46
  self.__region = env_utils.get_region()
47
+ self.__logger = get_logger()
48
+ default_online_table = self._get_offline_default_database()
49
+ if default_online_table:
50
+ env_utils.set_default_database(default_online_table.DatabaseName)
51
+
47
52
 
48
53
  @property
49
54
  def cloud_secret_id(self) -> str:
@@ -77,56 +82,6 @@ class FeatureTableClient:
77
82
  def region(self) -> str:
78
83
  return self.__region
79
84
 
80
- @staticmethod
81
- def _normalize_params(
82
- param: Optional[Union[str, Sequence[str]]],
83
- default_type: type = list
84
- ) -> list:
85
- """统一处理参数标准化"""
86
- if param is None:
87
- return default_type()
88
- return list(param) if isinstance(param, Sequence) else [param]
89
-
90
- @staticmethod
91
- def _validate_schema(df: DataFrame, schema: StructType):
92
- """校验DataFrame和schema的有效性和一致性"""
93
- # 检查是否同时为空
94
- if df is None and schema is None:
95
- raise ValueError("Either DataFrame or schema must be provided")
96
-
97
- # 检查schema匹配
98
- if df is not None and schema is not None:
99
- df_schema = df.schema
100
- if df_schema != schema:
101
- diff_fields = set(df_schema.fieldNames()).symmetric_difference(set(schema.fieldNames()))
102
- raise ValueError(
103
- f"DataFrame schema does not match. Differences: "
104
- f"{diff_fields if diff_fields else 'field type mismatch'}"
105
- )
106
-
107
- @staticmethod
108
- def _validate_key_conflicts(primary_keys: List[str], timestamp_keys: str):
109
- """校验主键与时间戳键是否冲突"""
110
- if timestamp_keys in primary_keys:
111
- raise ValueError(f"Timestamp keys conflict with primary keys: {timestamp_keys}")
112
-
113
- @staticmethod
114
- def _validate_key_exists(primary_keys: List[str], timestamp_keys: str):
115
- """校验主键与时间戳键是否存在"""
116
- if not primary_keys:
117
- raise ValueError("Primary keys cannot be empty")
118
- if not timestamp_keys:
119
- raise ValueError("Timestamp keys cannot be empty")
120
-
121
- @staticmethod
122
- def _escape_sql_value(value: str) -> str:
123
- """转义SQL值中的特殊字符"""
124
- return value.replace("'", "''")
125
-
126
- @staticmethod
127
- def _check_sequence_element_type(sequence: Sequence[Any], element_type: type) -> bool:
128
- """检查序列中的元素是否为指定类型"""
129
- return all(isinstance(element, element_type) for element in sequence)
130
85
 
131
86
  def create_table(
132
87
  self,
@@ -310,7 +265,7 @@ class FeatureTableClient:
310
265
  raise ValueError(f"Engine type {engine_type} is not supported")
311
266
 
312
267
  # 打印sql
313
- print(f"create table ddl: {ddl}\n")
268
+ self.__logger.info(f"create table ddl: {ddl}\n")
314
269
 
315
270
  # 执行DDL
316
271
  try:
@@ -320,8 +275,6 @@ class FeatureTableClient:
320
275
  except Exception as e:
321
276
  raise ValueError(f"Failed to create table: {str(e)}") from e
322
277
 
323
- print("async table info to feast")
324
-
325
278
  self._feast_client.create_table(
326
279
  table_name=table_name,
327
280
  primary_keys=primary_keys,
@@ -332,7 +285,7 @@ class FeatureTableClient:
332
285
  description=description
333
286
  )
334
287
 
335
- print(f"create table {name} done")
288
+ self.__logger.info(f"Table '{name}' created successfully. Starting web synchronization.")
336
289
 
337
290
  try:
338
291
  self._sync_table_info(table_name=name, action_name="create",
@@ -340,7 +293,8 @@ class FeatureTableClient:
340
293
  data_source_name=data_source_name, engine_name=env_utils.get_engine_name(),
341
294
  is_try=False)
342
295
  except tencentcloud.common.exception.TencentCloudSDKException as e:
343
- raise RuntimeError(f"Table '{name}' is can't create. {str(e)}")
296
+ raise RuntimeError(f"Failed to synchronize web data for table '{name}'. "
297
+ f"Please manually operate on the web page. Error: {str(e)}")
344
298
 
345
299
  # 构建并返回FeatureTable对象
346
300
  return FeatureTable(
@@ -527,27 +481,28 @@ class FeatureTableClient:
527
481
 
528
482
  try:
529
483
  self._sync_table_info(table_name=name, action_name="create",
530
- database_name=env_utils.get_database_name(database_name),
531
- data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=True)
484
+ database_name=env_utils.get_database_name(database_name),
485
+ data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=True)
532
486
  except tencentcloud.common.exception.TencentCloudSDKException as e:
533
487
  raise RuntimeError(f"Table '{name}' is can't create. {str(e)}")
534
488
 
535
489
  # 执行修改
536
- print("alter table sql", alter_sql)
490
+ self.__logger.info(f"alter table sql: \n {alter_sql}")
537
491
  self._spark.sql(alter_sql)
538
- print("Execute sql done, start sync table info to feast")
492
+ self.__logger.debug("Execute sql done, start sync table info to feast")
539
493
  self._feast_client.alter_table(full_table_name=table_name, primary_keys=primary_keys,
540
494
  timestamp_key=timestamp_key)
541
- print(f"Successfully register table '{table_name}'")
495
+ self.__logger.info(f"Successfully register table '{table_name}'. Starting web synchronization.")
542
496
 
543
497
  try:
544
498
  self._sync_table_info(table_name=name, action_name="create",
545
- database_name=env_utils.get_database_name(database_name),
546
- data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=False)
499
+ database_name=env_utils.get_database_name(database_name),
500
+ data_source_name=data_source_name, engine_name=env_utils.get_engine_name(), is_try=False)
547
501
  except tencentcloud.common.exception.TencentCloudSDKException as e:
548
- raise RuntimeError(f"sync table info failed. you need to sync table info manually. {str(e)}")
549
- except ValueError as e:
550
- raise # 直接抛出已知的ValueError
502
+ raise RuntimeError(f"Failed to synchronize web data for table '{name}'. "
503
+ f"Please manually operate on the web page. Error: {str(e)}")
504
+ except (ValueError, RuntimeError):
505
+ raise
551
506
  except Exception as e:
552
507
  raise RuntimeError(f"Failed to modify properties for table '{table_name}': {str(e)}") from e
553
508
 
@@ -624,33 +579,34 @@ class FeatureTableClient:
624
579
  try:
625
580
  # 检查表是否存在
626
581
  if not self._check_table_exists(table_name):
627
- print(f"Table '{name}' does not exist")
582
+ self.__logger.error(f"Table '{name}' does not exist")
628
583
  return
629
584
 
630
585
  try:
631
586
  feature_view = self._feast_client.get_feature_view(table_name)
632
587
  except Exception as e:
633
- print(f"Table '{name}' is not a feature table, skip delete. {str(e)}")
588
+ pass
589
+ # self.__logger.warning(f"Table '{name}' is not a feature table, skip delete. {str(e)}")
634
590
  else:
635
591
  if feature_view.online:
636
592
  raise ValueError(f"Table '{name}' has a online table, please call drop_online_table first")
637
593
  try:
638
594
  self._sync_table_info(table_name=name, action_name="delete",
639
- database_name=env_utils.get_database_name(database_name),
640
- data_source_name="", engine_name=env_utils.get_engine_name(), is_try=True)
595
+ database_name=env_utils.get_database_name(database_name),
596
+ data_source_name="", engine_name=env_utils.get_engine_name(), is_try=True)
641
597
  except tencentcloud.common.exception.TencentCloudSDKException as e:
642
598
  raise RuntimeError(f"Table '{name}' is can't delete. {str(e)}")
643
599
 
644
600
  # 执行删除
645
601
  self._spark.sql(f"DROP TABLE {table_name}")
646
- print(f"Table '{name}' dropped")
602
+ self.__logger.info(f"Table '{name}' dropped")
647
603
  try:
648
604
  self._feast_client.remove_offline_table(table_name=table_name)
649
605
  except Exception as e:
650
606
  raise
651
607
  # raise ValueError(f"Failed to delete table '{name}' in feast: {str(e)}")
652
608
  else:
653
- print(f"Table '{name}' removed from feast")
609
+ self.__logger.info(f"Table '{name}' removed from feast")
654
610
 
655
611
  try:
656
612
  self._sync_table_info(table_name=name, action_name="delete",
@@ -706,7 +662,7 @@ class FeatureTableClient:
706
662
  schema_name_list = [field.name for field in tmp_schema_list]
707
663
  schema = StructType(tmp_schema_list)
708
664
  for field in schema:
709
- print(f"{field.name} => {field.dataType}")
665
+ self.__logger.debug(f"translate {field.name} to feast Type: {field.dataType}")
710
666
 
711
667
  feast_client = FeastClient(offline_store=self._spark, online_store_config=online_config)
712
668
  # 构建离线表的entity的数据过滤
@@ -721,7 +677,7 @@ class FeatureTableClient:
721
677
  full_table_name=full_table_name,
722
678
  columns_name=columns_name_list,
723
679
  entity_rows=[result_row.asDict()])
724
- print("=====>read online dataframe:\n", online_view[schema_name_list])
680
+ self.__logger.debug(f"=====>read online dataframe:\n{online_view[schema_name_list]}")
725
681
  return self._spark.createDataFrame(online_view[schema_name_list], schema=schema, verifySchema=False)
726
682
  else:
727
683
  return self._spark.createDataFrame([])
@@ -730,7 +686,7 @@ class FeatureTableClient:
730
686
  full_table_name=full_table_name,
731
687
  columns_name=columns_name_list,
732
688
  entity_rows=entity_row)
733
- print("=====>read online dataframe:\n", online_view[schema_name_list])
689
+ self.__logger.debug(f"=====>read online dataframe:\n{online_view[schema_name_list]}")
734
690
  return self._spark.createDataFrame(online_view[schema_name_list], schema=schema, verifySchema=False)
735
691
 
736
692
  def get_table(
@@ -819,7 +775,9 @@ class FeatureTableClient:
819
775
 
820
776
  # 执行修改
821
777
  self._spark.sql(alter_sql)
822
- self._feast_client.modify_tags(table_name=table_name, tags=properties)
778
+ tbl_pro = self._spark.sql(f"SHOW TBLPROPERTIES {table_name}")
779
+ props = {row['key']: row['value'] for row in tbl_pro.collect()}
780
+ self._feast_client.modify_tags(table_name=table_name, tags=props)
823
781
  print(f"Successfully updated properties for table '{name}': {list(properties.keys())}")
824
782
 
825
783
  except ValueError as e:
@@ -827,9 +785,9 @@ class FeatureTableClient:
827
785
  except Exception as e:
828
786
  raise RuntimeError(f"Failed to modify properties for table '{name}': {str(e)}") from e
829
787
 
830
- def publish_table(self, table_name: str, data_source_name: str, cloud_secret_id: str, cloud_secret_key: str,
788
+ def publish_table(self, table_name: str, data_source_name: str,
831
789
  database_name: Optional[str] = None,
832
- is_cycle: bool = False, cycle_obj: TaskSchedulerConfiguration = None,
790
+ is_cycle: bool = False, cycle_obj: models.TaskSchedulerConfiguration = None,
833
791
  is_use_default_online: bool = True, online_config: RedisStoreConfig = None):
834
792
  """
835
793
  将离线特征表发布为在线特征表
@@ -852,46 +810,43 @@ class FeatureTableClient:
852
810
  # 检查是否已经发布,查看Redis中是否有值
853
811
  try:
854
812
  # 获取离线表的列名
855
- online_data = self._read_online_table(
856
- table_name=table_name,
857
- database_name=database_name,
858
- online_config=online_config)
813
+ online_data = self._feast_client.get_feature_view(full_table_name)
859
814
  except Exception as e:
860
815
  print(f"Failed to get online table view for table '{full_table_name}': {str(e)}")
861
816
  else:
862
- if online_data:
817
+ if online_data.online:
863
818
  raise ValueError(f"Table '{full_table_name}' has already been published")
864
819
 
865
820
  # 配置周期性参数
866
821
  if is_cycle:
867
- if not isinstance(cycle_obj, TaskSchedulerConfiguration):
822
+ if not isinstance(cycle_obj, models.TaskSchedulerConfiguration):
868
823
  raise ValueError("cycle_obj must be a TaskSchedulerConfiguration object when is_cycle is True")
869
824
 
870
825
  cycle_obj.CycleType = "CRONTAB_CYCLE"
871
826
  else:
872
- if isinstance(cycle_obj, TaskSchedulerConfiguration):
827
+ if isinstance(cycle_obj, models.TaskSchedulerConfiguration):
873
828
  cycle_obj.CycleType = "ONEOFF_CYCLE"
874
829
  else:
875
- cycle_obj = TaskSchedulerConfiguration()
830
+ cycle_obj = models.TaskSchedulerConfiguration()
876
831
  cycle_obj.CycleType = "ONEOFF_CYCLE"
877
832
  # 设置默认当前时间延后1分钟
878
833
  cycle_obj.CrontabExpression = (datetime.datetime.now() + datetime.timedelta(minutes=3)).strftime(
879
834
  "%M %H %d %m %w ? %y")
880
835
 
881
836
  if is_use_default_online:
882
- online_feature_config = OnlineFeatureConfiguration()
837
+ online_feature_config = models.OnlineFeatureConfiguration()
883
838
  online_feature_config.UserDefault = True
884
839
  else:
885
840
  if not isinstance(online_config, RedisStoreConfig):
886
841
  raise ValueError("online_config must be a RedisStoreConfig object when is_use_default_online is False")
887
842
 
888
- online_feature_config = OnlineFeatureConfiguration()
843
+ online_feature_config = models.OnlineFeatureConfiguration()
889
844
  online_feature_config.UserDefault = False
890
845
  online_feature_config.Host = online_config.host
891
846
  online_feature_config.Port = online_config.port
892
847
  online_feature_config.DB = online_config.db
893
848
 
894
- offline_feature_config = OfflineFeatureConfiguration()
849
+ offline_feature_config = models.OfflineFeatureConfiguration()
895
850
  offline_feature_config.DatabaseName = env_utils.get_database_name(database_name)
896
851
  offline_feature_config.TableName = table_name
897
852
 
@@ -902,7 +857,7 @@ class FeatureTableClient:
902
857
  offline_feature_config.DatasourceType = env_utils.get_engine_type()
903
858
  offline_feature_config.EngineName = env_utils.get_engine_name()
904
859
 
905
- api_requests = CreateOnlineFeatureTableRequest()
860
+ api_requests = models.CreateOnlineFeatureTableRequest()
906
861
  api_requests.OfflineFeatureConfiguration = offline_feature_config
907
862
  api_requests.OnlineFeatureConfiguration = online_feature_config
908
863
  api_requests.TaskSchedulerConfiguration = cycle_obj
@@ -910,11 +865,11 @@ class FeatureTableClient:
910
865
  region = env_utils.get_region()
911
866
  if not os.environ.get("RESOURCE_GROUP_ID", ""):
912
867
  res_group_item = _get_default_resource_group(
913
- api_requests.ProjectId, cloud_secret_id, cloud_secret_key, region)
868
+ api_requests.ProjectId, self.__cloud_secret_id, self.__cloud_secret_key, region)
914
869
  api_requests.ResourceGroupId = res_group_item.ExecutorGroupId
915
870
  else:
916
871
  api_requests.ResourceGroupId = os.environ.get("RESOURCE_GROUP_ID")
917
- client = FeatureCloudSDK(secret_id=cloud_secret_id, secret_key=cloud_secret_key, region=region)
872
+ client = FeatureCloudSDK(secret_id=self.__cloud_secret_id, secret_key=self.__cloud_secret_key, region=region)
918
873
  resp = client.CreateOnlineFeatureTable(api_requests)
919
874
  if cycle_obj.CycleType == "ONEOFF_CYCLE":
920
875
  print(f"publish online task create success. it will be execute after 3 min. {resp.Data.OnlineTableId} {resp.Data.OfflineTableId} ")
@@ -955,10 +910,23 @@ class FeatureTableClient:
955
910
  def _check_table_exists(self, full_table_name: str) -> bool:
956
911
  return common_utils.check_spark_table_exists(self._spark, full_table_name)
957
912
 
913
+ def _get_offline_default_database(self) -> Optional[models.FeatureStoreDatabase]:
914
+ client = FeatureCloudSDK(secret_id=self.__cloud_secret_id, secret_key=self.__cloud_secret_key,
915
+ region=self.__region)
916
+ req = models.DescribeFeatureStoreDatabasesRequest()
917
+ req.ProjectId = self.__project
918
+ rsp = client.DescribeFeatureStoreDatabases(req)
919
+ if len(rsp.Data) == 0:
920
+ return None
921
+ for item in rsp.Data:
922
+ if item.OnlineMode == 0 and item.IsDefault == 1:
923
+ return item
924
+ return None
925
+
958
926
 
959
927
  def _get_default_resource_group(project_id: str, secret_id: str, secret_key: str, region: str):
960
928
  client = FeatureCloudSDK(secret_id=secret_id, secret_key=secret_key, region=region)
961
- request = DescribeNormalSchedulerExecutorGroupsRequest()
929
+ request = models.DescribeNormalSchedulerExecutorGroupsRequest()
962
930
  request.ProjectId = project_id
963
931
  resp = client.DescribeNormalSchedulerExecutorGroups(request)
964
932
  # 默认取第一个健康可用的资源组进行执行
@@ -972,7 +940,7 @@ def _refresh_table(project_id: str, secret_id: str, secret_key: str, region: str
972
940
  action: str, database_name: str, data_source_name: str, data_source_type: str,
973
941
  engine_name: str, is_try: bool):
974
942
  client = FeatureCloudSDK(secret_id=secret_id, secret_key=secret_key, region=region)
975
- request = RefreshFeatureTableRequest()
943
+ request = models.RefreshFeatureTableRequest()
976
944
  request.ProjectId = project_id
977
945
  request.TableName = table_name
978
946
  request.DatabaseName = database_name
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  import os
3
3
  from types import ModuleType
4
- from typing import Any, List, Optional, Set, Union, Dict
4
+ from typing import Any, List, Optional, Union, Dict
5
5
 
6
6
  import mlflow
7
7
  from mlflow.models import Model
@@ -9,18 +9,17 @@ from mlflow.utils.file_utils import TempDir, read_yaml
9
9
  from pyspark.sql import DataFrame
10
10
  from pyspark.sql.functions import struct
11
11
 
12
- from wedata.feature_store.constants import constants
13
- from wedata.feature_store.entities.feature_function import FeatureFunction
14
- from wedata.feature_store.entities.feature_lookup import FeatureLookup
15
- from wedata.feature_store.entities.feature_spec import FeatureSpec
16
- from wedata.feature_store.entities.feature_table import FeatureTable
17
- from wedata.feature_store.entities.training_set import TrainingSet
12
+ from wedata.common.constants import constants
13
+ from wedata.common.entities.feature_function import FeatureFunction
14
+ from wedata.common.entities.feature_lookup import FeatureLookup
15
+ from wedata.common.entities.feature_spec import FeatureSpec
16
+ from wedata.common.entities.training_set import TrainingSet
18
17
  from wedata.feature_store.mlflow_model import _FeatureStoreModelWrapper
19
- from wedata.feature_store.spark_client.spark_client import SparkClient
20
- from wedata.feature_store.utils import validation_utils
21
- from wedata.feature_store.entities.feature_table import FeatureTable
18
+ from wedata.common.spark_client import SparkClient
19
+ from wedata.common.utils import validation_utils, common_utils, training_set_utils
20
+ from wedata.common.entities.feature_table import FeatureTable
22
21
 
23
- from wedata.feature_store.constants.constants import (
22
+ from wedata.common.constants.constants import (
24
23
  _NO_RESULT_TYPE_PASSED,
25
24
  _USE_SPARK_NATIVE_JOIN,
26
25
  MODEL_DATA_PATH_ROOT,
@@ -28,8 +27,8 @@ from wedata.feature_store.constants.constants import (
28
27
  _PREBUILT_ENV_URI
29
28
  )
30
29
 
31
- from wedata.feature_store.utils import common_utils, training_set_utils, uc_utils
32
- from wedata.feature_store.utils.signature_utils import get_mlflow_signature_from_feature_spec, \
30
+ from wedata.common.utils import uc_utils
31
+ from wedata.common.utils.signature_utils import get_mlflow_signature_from_feature_spec, \
33
32
  drop_signature_inputs_and_invalid_params
34
33
 
35
34
  _logger = logging.getLogger(__name__)
@@ -389,16 +388,13 @@ class TrainingSetClient:
389
388
  "The provided DataFrame for scoring must have unique column names. Found duplicates {}.",
390
389
  )
391
390
  artifact_path = os.path.join("artifacts", MODEL_DATA_PATH_ROOT)
392
- # print(f"artifact_path: {artifact_path}")
393
391
  with (TempDir() as tmp_location):
394
392
  local_path = (
395
393
  local_uri
396
394
  if local_uri
397
395
  else common_utils.download_model_artifacts(model_uri, tmp_location.path())
398
396
  )
399
- # print(f"wedata local_path:{local_path}")
400
397
  model_data_path = os.path.join(local_path, artifact_path)
401
- # print(f"artifact_path: {artifact_path}")
402
398
 
403
399
  # Augment local workspace metastore tables from 2L to 3L,
404
400
  # this will prevent us from erroneously reading data from other catalogs
@@ -424,9 +420,7 @@ class TrainingSetClient:
424
420
 
425
421
  # Validate that columns needed for joining feature tables exist and are not duplicates.
426
422
  feature_input_keys = []
427
- print("====>timestamp_key:", timestamp_key)
428
423
  for fci in feature_spec.feature_column_infos:
429
- print("====>fci:", fci.lookup_key)
430
424
  feature_input_keys.extend([k for k in fci.lookup_key])
431
425
 
432
426
  on_demand_input_names = uc_utils.get_unique_list_order(
@@ -440,18 +434,13 @@ class TrainingSetClient:
440
434
  source_data_names = [
441
435
  sdci.name for sdci in feature_spec.source_data_column_infos
442
436
  ]
443
- # print(f"wedata source_data_names:{source_data_names}")
444
-
445
- print("===>source_data_names:", source_data_names)
446
437
 
447
438
  feature_output_names = [
448
439
  fci.output_name for fci in feature_spec.feature_column_infos
449
440
  ]
450
- print("====>feature_output_names:", feature_output_names)
451
441
  on_demand_output_names = [
452
442
  odci.output_name for odci in feature_spec.on_demand_column_infos
453
443
  ]
454
- print("====>on_demand_output_names:", on_demand_output_names)
455
444
  all_output_names = set(
456
445
  source_data_names + feature_output_names + on_demand_output_names
457
446
  )
wedata/tempo/interpol.py CHANGED
@@ -23,7 +23,7 @@ class Interpolation:
23
23
  """
24
24
  Validate if the fill provided is within the allowed list of values.
25
25
 
26
- :param fill: Fill type e.g. "zero", "null", "bfill", "ffill", "linear"
26
+ :param method: Fill type e.g. "zero", "null", "bfill", "ffill", "linear"
27
27
  """
28
28
  if method not in method_options:
29
29
  raise ValueError(
@@ -43,8 +43,8 @@ class Interpolation:
43
43
 
44
44
  :param df: DataFrame to be validated
45
45
  :param partition_cols: Partition columns to be validated
46
- :param target_col: Target column to be validated
47
46
  :param ts_col: Timestamp column to be validated
47
+ :param ts_col_dtype: Timestamp column type
48
48
  """
49
49
 
50
50
  if partition_cols is not None:
@@ -1,66 +0,0 @@
1
- wedata/__init__.py,sha256=GYxqkkgH0oH4QtNiOCZHuGkc0sSH1LgEqmhSX6sB4So,200
2
- wedata/feature_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- wedata/feature_store/client.py,sha256=B6fy-PGJZsEHGPE8tDmrTolyioJ_-Po2DK3p_HCW7Sw,19552
4
- wedata/feature_store/mlflow_model.py,sha256=OCUuccOoO0NXWSzIPoGeL03Ha1Q3aQTJW2RlJrTCmzc,554
5
- wedata/feature_store/cloud_sdk_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- wedata/feature_store/cloud_sdk_client/client.py,sha256=B7nCQ_MvbLP4ieT7rsa32FHws2fOG4VQZT5lmQ3Cvzk,4914
7
- wedata/feature_store/cloud_sdk_client/models.py,sha256=7_QUq0kZcrcclRMsIYFoqBrlzVwaHoVY-yU5SHIrJWM,19789
8
- wedata/feature_store/cloud_sdk_client/utils.py,sha256=xwvXJpk2RXbJtgOaXCZQbGRrlzcTRzv27yQFxKp_X84,970
9
- wedata/feature_store/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- wedata/feature_store/common/log/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- wedata/feature_store/common/log/logger.py,sha256=c45DlIbIuwRP2na3ZXsncpHV5KUltqfyKzIgG9GG3g4,1151
12
- wedata/feature_store/common/protos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- wedata/feature_store/common/protos/feature_store_pb2.py,sha256=oMIUGGeGNP84g_nFqOQwTXjV1GiU2ehSOy7CyFu2__g,4207
14
- wedata/feature_store/common/store_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- wedata/feature_store/common/store_config/redis.py,sha256=9R5npM2s1u0o9IakmpbRsFdJC0vNar_uvA62OLWuXBs,1145
16
- wedata/feature_store/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- wedata/feature_store/constants/constants.py,sha256=M0UKZSnpM7x5RYDvfesyF422zg82JJe4JsHUuUDiUF4,1959
18
- wedata/feature_store/constants/engine_types.py,sha256=42mI-kNDDtoA4_I3iqDe4FkF2M2l_Bt4Q1V6WUB-_k0,921
19
- wedata/feature_store/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- wedata/feature_store/entities/column_info.py,sha256=-AR6EKHwgoqIkRHFyguxVEtnYt6fvusWHkEjF4kvS0A,5141
21
- wedata/feature_store/entities/environment_variables.py,sha256=ZEFml5H9MQuzBKM074mUrFYu-Sga4Knmxqiwpke2WGc,1679
22
- wedata/feature_store/entities/feature.py,sha256=wX8fTBlJq3GYdj9rrBDCY3kFgcVBBAiOOZdxEhnQkNQ,1241
23
- wedata/feature_store/entities/feature_column_info.py,sha256=ZAS_I-MDg2ofCv3nwYvGCQrrpEljzrh_L1D-gqOV_mM,2407
24
- wedata/feature_store/entities/feature_function.py,sha256=R17INrCE-U_Uj9KLbFz69aYlOkTETTwQHMMo470F4lQ,1865
25
- wedata/feature_store/entities/feature_lookup.py,sha256=UYmYCzkQ1_KuooybS3F-7HDcjBMPZ72InL06UTHbEtw,8749
26
- wedata/feature_store/entities/feature_spec.py,sha256=Z2SXE_LObjNY3q5yBVKPXGTUiMZy7zaI6-ZbAoFlwG8,21769
27
- wedata/feature_store/entities/feature_spec_constants.py,sha256=YWDBfRiNDe6fUJFUBo3V4WYg2xsljoPAE-ZejfFZCgM,785
28
- wedata/feature_store/entities/feature_table.py,sha256=nHCCd7WUryROt9oTJpYkT-KiGbKcQd7BEE9L2_1dhYw,4107
29
- wedata/feature_store/entities/feature_table_info.py,sha256=yJ1P3AYaPiHW6ehCbMWhndzguHJqJKWfeFwYjwTLt2U,1481
30
- wedata/feature_store/entities/function_info.py,sha256=yDwIzTrBR-ECWubgeoy48SYZfdY7P0JcraZnWGCW0ag,2752
31
- wedata/feature_store/entities/on_demand_column_info.py,sha256=a44ep-f3FOruWNXl3c8v7733rNuoKXJaHTv1aqF905s,1739
32
- wedata/feature_store/entities/source_data_column_info.py,sha256=FyBmBPUSvc2S2OPFTvsQf2AdS-KFGkYBmd4yL_Vur8M,702
33
- wedata/feature_store/entities/training_set.py,sha256=ylt1h6Z_xU8hKYvnvd80CeewTGSN68-_kvFpoliwH7s,5679
34
- wedata/feature_store/feast_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- wedata/feature_store/feast_client/feast_client.py,sha256=mCv-OiKehfgcOJhJV0wXMRs5d7e2zEBYmVmDguk0rxU,20728
36
- wedata/feature_store/feature_table_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- wedata/feature_store/feature_table_client/feature_table_client.py,sha256=qyr-jS-nIyphFVZXcUXV_HSfAu-8c19f0b8iG5rYsl8,42669
38
- wedata/feature_store/spark_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- wedata/feature_store/spark_client/spark_client.py,sha256=aTenEqfZoJYMrph98qjNHZ-H4dgNKnMaH14st8bCVRQ,11797
40
- wedata/feature_store/training_set_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- wedata/feature_store/training_set_client/training_set_client.py,sha256=Ja_W1SKWKueW6wmwDx-623mfpwKQICm6A-ec_jgOFt4,23707
42
- wedata/feature_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
- wedata/feature_store/utils/common_utils.py,sha256=vkpoXxZTd6S-2MgdyTQZ6P_ckdqNSK50ECuVBG2BwfI,12314
44
- wedata/feature_store/utils/env_utils.py,sha256=kMCl6gyqDxjN5IZ7-wZMR0R4YY-Evh_7NHxugSYPWQc,2736
45
- wedata/feature_store/utils/feature_lookup_utils.py,sha256=mNV6RhBdpv1iTZduCA9YwXwkeJCwU5MFQ1MkFeD9IhY,22003
46
- wedata/feature_store/utils/feature_spec_utils.py,sha256=j8t-zel2_r8Q9m88BmFKkHMdkGNIduWJB-28OZDASRY,11613
47
- wedata/feature_store/utils/feature_utils.py,sha256=KKq28bVB_lCuhnR9Hk6JegJBOVgcelWlvrRM-F9onkA,2796
48
- wedata/feature_store/utils/on_demand_utils.py,sha256=pazZRG5c0Se08MV_inBddIeX4Q9xlVN_H9SC_WK3xzs,4260
49
- wedata/feature_store/utils/schema_utils.py,sha256=y6EYY1pUxjVg6MP4C7avdW8ZEBBaDo1YTV2CmPF4i8o,4491
50
- wedata/feature_store/utils/signature_utils.py,sha256=SZFufd19m0jmGnOLmAl3JPKZC-qHq-wQezh6G7HOMfc,7773
51
- wedata/feature_store/utils/topological_sort.py,sha256=ebzKxmxeCLk9seB1zR0ASCGXsZsa-DjxJeTc4KUadtg,6475
52
- wedata/feature_store/utils/training_set_utils.py,sha256=MYsPZS1d9HKswHgjgxD8K7H9N3dWPyyTTx20Mkp4PVU,22497
53
- wedata/feature_store/utils/uc_utils.py,sha256=5jngdLT8quP1lfGHN_SSFQQlcOh_sUB9M1varCgdFwg,11436
54
- wedata/feature_store/utils/validation_utils.py,sha256=lJe6HCg5v5CZxH_pvT-vpGhCpo66LT2erXraHE2T0iI,2584
55
- wedata/tempo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- wedata/tempo/interpol.py,sha256=3JF8dwcdKv2o10FN45aefgvxR5DjlR6FJAXrbAiGCro,16423
57
- wedata/tempo/intervals.py,sha256=L2ao7LlgQmfDTFwnBoFLXeuygSvwtIKXL52thiD80Yw,44078
58
- wedata/tempo/io.py,sha256=KWIn6IBSkuBxr8QCcpdZ6NFX_49-8UQdGftmZgs_ujw,1872
59
- wedata/tempo/ml.py,sha256=WtGa2szn6PditvZsTZoxo7wFDe4k1SRoMZ-jgNGIjvE,4323
60
- wedata/tempo/resample.py,sha256=h81RVVmCl4ect-YKE-KZZHPDi1rGI3sh-YIb-Btz0ck,9698
61
- wedata/tempo/tsdf.py,sha256=S4lZfxhSRFiezYoYS6gvGsl1mZA3zp-MWEKFHYZpDg0,70968
62
- wedata/tempo/utils.py,sha256=I9I6l2DMwUoY213L04Yc1UR_zTWgSkj1BVo4ZwzQd4Y,7977
63
- tencent_wedata_feature_engineering_dev-0.1.48.dist-info/METADATA,sha256=-A-1H2urk9u4M9sEoQYri1xCeVtH3lFhUrsajCGb9tU,582
64
- tencent_wedata_feature_engineering_dev-0.1.48.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
65
- tencent_wedata_feature_engineering_dev-0.1.48.dist-info/top_level.txt,sha256=Xa0v1rh__RvfVTVDirW5r5UBKg7ZO_iuTeXfp8MNo2A,7
66
- tencent_wedata_feature_engineering_dev-0.1.48.dist-info/RECORD,,
File without changes