tencent-wedata-feature-engineering-dev 0.1.42__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {tencent_wedata_feature_engineering_dev-0.1.42.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/METADATA +14 -3
  2. tencent_wedata_feature_engineering_dev-0.2.5.dist-info/RECORD +78 -0
  3. {tencent_wedata_feature_engineering_dev-0.1.42.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/WHEEL +1 -1
  4. wedata/__init__.py +1 -1
  5. wedata/common/base_table_client/__init__.py +1 -0
  6. wedata/common/base_table_client/base.py +58 -0
  7. wedata/common/cloud_sdk_client/__init__.py +2 -0
  8. wedata/{feature_store → common}/cloud_sdk_client/client.py +56 -12
  9. wedata/{feature_store → common}/cloud_sdk_client/models.py +212 -37
  10. wedata/{feature_store → common}/cloud_sdk_client/utils.py +14 -0
  11. wedata/{feature_store → common}/constants/constants.py +3 -2
  12. wedata/common/constants/engine_types.py +34 -0
  13. wedata/{feature_store → common}/entities/column_info.py +6 -5
  14. wedata/{feature_store → common}/entities/feature_column_info.py +2 -1
  15. wedata/{feature_store → common}/entities/feature_lookup.py +1 -1
  16. wedata/{feature_store → common}/entities/feature_spec.py +9 -9
  17. wedata/{feature_store → common}/entities/feature_table_info.py +1 -1
  18. wedata/{feature_store → common}/entities/function_info.py +2 -1
  19. wedata/{feature_store → common}/entities/on_demand_column_info.py +2 -1
  20. wedata/{feature_store → common}/entities/source_data_column_info.py +3 -1
  21. wedata/{feature_store → common}/entities/training_set.py +6 -6
  22. wedata/common/feast_client/__init__.py +1 -0
  23. wedata/{feature_store → common}/feast_client/feast_client.py +3 -4
  24. wedata/common/log/__init__.py +1 -0
  25. wedata/common/log/logger.py +44 -0
  26. wedata/common/spark_client/__init__.py +1 -0
  27. wedata/{feature_store → common}/spark_client/spark_client.py +6 -9
  28. wedata/{feature_store → common}/utils/common_utils.py +7 -9
  29. wedata/{feature_store → common}/utils/env_utils.py +31 -10
  30. wedata/{feature_store → common}/utils/feature_lookup_utils.py +6 -6
  31. wedata/{feature_store → common}/utils/feature_spec_utils.py +13 -8
  32. wedata/{feature_store → common}/utils/feature_utils.py +5 -5
  33. wedata/{feature_store → common}/utils/on_demand_utils.py +5 -4
  34. wedata/{feature_store → common}/utils/schema_utils.py +1 -1
  35. wedata/{feature_store → common}/utils/signature_utils.py +4 -4
  36. wedata/{feature_store → common}/utils/training_set_utils.py +13 -13
  37. wedata/{feature_store → common}/utils/uc_utils.py +1 -1
  38. wedata/feature_engineering/__init__.py +1 -0
  39. wedata/feature_engineering/client.py +417 -0
  40. wedata/feature_engineering/ml_training_client/ml_training_client.py +569 -0
  41. wedata/feature_engineering/mlflow_model.py +9 -0
  42. wedata/feature_engineering/table_client/__init__.py +0 -0
  43. wedata/feature_engineering/table_client/table_client.py +548 -0
  44. wedata/feature_store/client.py +13 -16
  45. wedata/feature_store/constants/engine_types.py +8 -30
  46. wedata/feature_store/feature_table_client/feature_table_client.py +98 -108
  47. wedata/feature_store/training_set_client/training_set_client.py +14 -17
  48. wedata/tempo/interpol.py +2 -2
  49. tencent_wedata_feature_engineering_dev-0.1.42.dist-info/RECORD +0 -64
  50. {tencent_wedata_feature_engineering_dev-0.1.42.dist-info → tencent_wedata_feature_engineering_dev-0.2.5.dist-info}/top_level.txt +0 -0
  51. /wedata/{feature_store/cloud_sdk_client → common}/__init__.py +0 -0
  52. /wedata/{feature_store/common/protos → common/constants}/__init__.py +0 -0
  53. /wedata/{feature_store → common}/entities/__init__.py +0 -0
  54. /wedata/{feature_store → common}/entities/environment_variables.py +0 -0
  55. /wedata/{feature_store → common}/entities/feature.py +0 -0
  56. /wedata/{feature_store → common}/entities/feature_function.py +0 -0
  57. /wedata/{feature_store → common}/entities/feature_spec_constants.py +0 -0
  58. /wedata/{feature_store → common}/entities/feature_table.py +0 -0
  59. /wedata/{feature_store/feast_client → common/protos}/__init__.py +0 -0
  60. /wedata/{feature_store/common → common}/protos/feature_store_pb2.py +0 -0
  61. /wedata/{feature_store/spark_client → common/utils}/__init__.py +0 -0
  62. /wedata/{feature_store → common}/utils/topological_sort.py +0 -0
  63. /wedata/{feature_store → common}/utils/validation_utils.py +0 -0
  64. /wedata/{feature_store/utils → feature_engineering/ml_training_client}/__init__.py +0 -0
@@ -40,10 +40,10 @@ RAW_MODEL_FOLDER = "raw_model"
40
40
  ML_MODEL = "MLmodel"
41
41
 
42
42
  # 特征查找客户端的PyPI包名
43
- FEATURE_LOOKUP_CLIENT_PIP_PACKAGE = "tencent-wedata-feature-engineering-dev"
43
+ FEATURE_LOOKUP_CLIENT_PIP_PACKAGE = "tencent-wedata-feature-engineering"
44
44
 
45
45
  # 特征查找版本号
46
- FEATURE_LOOKUP_CLIENT_MAJOR_VERSION = "0.1.42"
46
+ FEATURE_LOOKUP_CLIENT_MAJOR_VERSION = "0.2.5"
47
47
 
48
48
  # 特征存储内部数据目录
49
49
  FEATURE_STORE_INTERNAL_DATA_DIR = "_wedata_internal/"
@@ -56,4 +56,5 @@ FEATURE_TABLE_VALUE = "true"
56
56
  FEATURE_TABLE_PROJECT = "wedata.feature_project_id"
57
57
  FEATURE_TABLE_TIMESTAMP = "timestampKeys"
58
58
  FEATURE_TABLE_BACKUP_PRIMARY_KEY = "primaryKeys" # 备用标识,主键
59
+ FEATURE_ENGINEERING_TABLE_PRIMARY_KEY_WEDATA = "primary-key" # 用于Wedata3
59
60
  FEATURE_DLC_TABLE_PRIMARY_KEY = "dlc.ao.data.govern.sorted.keys"
@@ -0,0 +1,34 @@
1
+ from enum import Enum
2
+ import os
3
+
4
+
5
+ class EngineTypes(Enum):
6
+ HIVE_ENGINE = "hive"
7
+ ICEBERG_ENGINE = "iceberg"
8
+
9
+ @classmethod
10
+ def get_engine(cls, engine_name: str) -> 'EngineTypes':
11
+ try:
12
+ return cls(engine_name.lower())
13
+ except ValueError:
14
+ raise ValueError(f"Invalid engine type: {engine_name}. Supported engine types: {list(cls)}")
15
+
16
+
17
+ class CalculateEngineTypes(Enum):
18
+ DLC = "dlc"
19
+ EMR = "emr"
20
+
21
+ @classmethod
22
+ def get_calculate_engine(cls, engine_name: str) -> 'CalculateEngineTypes':
23
+ try:
24
+ return cls(engine_name.lower())
25
+ except ValueError:
26
+ raise ValueError(f"Invalid engine type: {engine_name}. Supported engine types: {list(cls)}")
27
+
28
+
29
+ def judge_engine_type() -> 'CalculateEngineTypes':
30
+ if os.environ.get("DLC_REGION", ""):
31
+ return CalculateEngineTypes.DLC
32
+ else:
33
+ return CalculateEngineTypes.EMR
34
+
@@ -1,13 +1,14 @@
1
1
  import copy
2
2
  from typing import Optional, Union
3
3
 
4
- from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
5
- from wedata.feature_store.entities.feature_spec_constants import SOURCE_DATA_COLUMN_INFO, FEATURE_COLUMN_INFO, \
4
+ from wedata.common.entities.feature_column_info import FeatureColumnInfo
5
+ from wedata.common.entities.feature_spec_constants import SOURCE_DATA_COLUMN_INFO, FEATURE_COLUMN_INFO, \
6
6
  ON_DEMAND_COLUMN_INFO
7
- from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
8
- from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
7
+ from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
8
+ from wedata.common.entities.source_data_column_info import SourceDataColumnInfo
9
+
10
+ from wedata.common.protos import feature_store_pb2
9
11
 
10
- from wedata.feature_store.common.protos import feature_store_pb2
11
12
 
12
13
  class ColumnInfo:
13
14
  """
@@ -1,6 +1,7 @@
1
1
  from typing import List, Optional
2
2
 
3
- from wedata.feature_store.common.protos import feature_store_pb2
3
+ from wedata.common.protos import feature_store_pb2
4
+
4
5
 
5
6
  class FeatureColumnInfo:
6
7
  def __init__(
@@ -3,7 +3,7 @@ import datetime
3
3
  import logging
4
4
  from typing import Dict, List, Optional, Union
5
5
 
6
- from wedata.feature_store.utils import common_utils
6
+ from wedata.common.utils import common_utils
7
7
  from wedata.feature_store.common.store_config.redis import RedisStoreConfig
8
8
 
9
9
  _logger = logging.getLogger(__name__)
@@ -7,11 +7,11 @@ import mlflow
7
7
  from google.protobuf.json_format import MessageToDict, ParseDict
8
8
  from mlflow.utils.file_utils import TempDir, read_yaml, write_yaml
9
9
 
10
- from wedata.feature_store.common.protos import feature_store_pb2
11
- from wedata.feature_store.entities.column_info import ColumnInfo
12
- from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
13
- from wedata.feature_store.entities.function_info import FunctionInfo
14
- from wedata.feature_store.entities.feature_spec_constants import (
10
+ from wedata.common.protos import feature_store_pb2
11
+ from wedata.common.entities.column_info import ColumnInfo
12
+ from wedata.common.entities.feature_column_info import FeatureColumnInfo
13
+ from wedata.common.entities.function_info import FunctionInfo
14
+ from wedata.common.entities.feature_spec_constants import (
15
15
  BOUND_TO,
16
16
  DATA_TYPE,
17
17
  FEATURE_COLUMN_INFO,
@@ -34,10 +34,10 @@ from wedata.feature_store.entities.feature_spec_constants import (
34
34
  TRAINING_DATA,
35
35
  UDF_NAME,
36
36
  )
37
- from wedata.feature_store.entities.feature_table_info import FeatureTableInfo
38
- from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
39
- from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
40
- from wedata.feature_store.utils import common_utils
37
+ from wedata.common.entities.feature_table_info import FeatureTableInfo
38
+ from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
39
+ from wedata.common.entities.source_data_column_info import SourceDataColumnInfo
40
+ from wedata.common.utils import common_utils
41
41
 
42
42
  # Change log for serialization version. Please update for each serialization version.
43
43
  # 1. Initial.
@@ -1,7 +1,7 @@
1
1
  from typing import Optional
2
2
 
3
+ from wedata.common.protos import feature_store_pb2
3
4
 
4
- from wedata.feature_store.common.protos import feature_store_pb2
5
5
 
6
6
  class FeatureTableInfo:
7
7
  def __init__(
@@ -1,7 +1,8 @@
1
1
 
2
2
  from typing import List, Optional
3
3
 
4
- from wedata.feature_store.common.protos import feature_store_pb2
4
+ from wedata.common.protos import feature_store_pb2
5
+
5
6
 
6
7
  class FunctionParameterInfo():
7
8
  def __init__(self, name: str, type_text: str):
@@ -1,6 +1,7 @@
1
1
  from typing import Dict
2
2
 
3
- from wedata.feature_store.common.protos import feature_store_pb2
3
+ from wedata.common.protos import feature_store_pb2
4
+
4
5
 
5
6
  class OnDemandColumnInfo:
6
7
  def __init__(
@@ -1,4 +1,6 @@
1
- from wedata.feature_store.common.protos import feature_store_pb2
1
+ from wedata.common.protos import feature_store_pb2
2
+
3
+
2
4
  class SourceDataColumnInfo:
3
5
  def __init__(self, name: str):
4
6
  if not name:
@@ -2,20 +2,20 @@ from typing import Dict, List, Optional
2
2
 
3
3
  from pyspark.sql import DataFrame
4
4
 
5
- from wedata.feature_store.entities.feature_table import FeatureTable
6
- from wedata.feature_store.entities.function_info import FunctionInfo
7
- from wedata.feature_store.utils.feature_lookup_utils import (
5
+ from wedata.common.entities.feature_table import FeatureTable
6
+ from wedata.common.entities.function_info import FunctionInfo
7
+ from wedata.common.utils.feature_lookup_utils import (
8
8
  join_feature_data_if_not_overridden,
9
9
  )
10
10
 
11
- from wedata.feature_store.entities.feature_spec import FeatureSpec
12
- from wedata.feature_store.utils.feature_spec_utils import (
11
+ from wedata.common.entities.feature_spec import FeatureSpec
12
+ from wedata.common.utils.feature_spec_utils import (
13
13
  COLUMN_INFO_TYPE_FEATURE,
14
14
  COLUMN_INFO_TYPE_ON_DEMAND,
15
15
  COLUMN_INFO_TYPE_SOURCE,
16
16
  get_feature_execution_groups,
17
17
  )
18
- from wedata.feature_store.utils.on_demand_utils import apply_functions_if_not_overridden
18
+ from wedata.common.utils.on_demand_utils import apply_functions_if_not_overridden
19
19
 
20
20
 
21
21
  class TrainingSet:
@@ -0,0 +1 @@
1
+ from .feast_client import FeastClient
@@ -20,6 +20,7 @@ import pytz
20
20
  from feast import FeatureStore, RepoConfig, FeatureView
21
21
  from pyspark.sql import DataFrame, SparkSession
22
22
  from wedata.feature_store.common.store_config.redis import RedisStoreConfig
23
+ from wedata.common.utils import env_utils
23
24
  from feast import Entity, FeatureService
24
25
  from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import SparkSource
25
26
  from feast.infra.online_stores.redis import RedisOnlineStore
@@ -37,8 +38,8 @@ TEMP_FILE_PATH = "/tmp/feast_data/"
37
38
  class FeastClient:
38
39
 
39
40
  def __init__(self, offline_store: SparkSession, online_store_config: RedisStoreConfig = None):
40
- project_id = os.getenv("WEDATA_PROJECT_ID", "")
41
- remote_path = os.getenv("FEAST_REMOTE_ADDRESS", "")
41
+ project_id = env_utils.get_project_id()
42
+ remote_path = env_utils.get_feast_remote_url()
42
43
  if offline_store is None or not isinstance(offline_store, SparkSession):
43
44
  raise ValueError("offline_store must be provided SparkSession instance")
44
45
 
@@ -146,8 +147,6 @@ class FeastClient:
146
147
  self._client.apply(feature_service)
147
148
  self._client.registry.delete_feature_view(feast_table_name, self._client.project)
148
149
 
149
-
150
-
151
150
  def get_feature_view(self, table_name: str):
152
151
  feast_table_name = translate_table_name_to_feast(table_name)
153
152
  return self._client.get_feature_view(feast_table_name)
@@ -0,0 +1 @@
1
+ from .logger import get_logger
@@ -0,0 +1,44 @@
1
+ import logging
2
+ import sys
3
+
4
+
5
+ class LoggerSingleton:
6
+ _instance = None
7
+
8
+ def __new__(cls):
9
+ if cls._instance is None:
10
+ cls._instance = super().__new__(cls)
11
+ cls._instance._initialize_logger()
12
+ return cls._instance
13
+
14
+ def _initialize_logger(self):
15
+ self.logger = logging.getLogger("wedata-feature-engineering")
16
+ self.logger.setLevel(logging.INFO)
17
+
18
+ # 清除已有的handler,避免重复添加
19
+ if self.logger.handlers:
20
+ self.logger.handlers.clear()
21
+
22
+ # 创建formatter,包含时间、文件名和行号
23
+ formatter = logging.Formatter(
24
+ fmt='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
25
+ datefmt='%Y-%m-%d %H:%M:%S'
26
+ )
27
+
28
+ # 创建handler并输出到stdout
29
+ handler = logging.StreamHandler(sys.stdout)
30
+ handler.setFormatter(formatter)
31
+
32
+ self.logger.addHandler(handler)
33
+
34
+ # 防止消息传播到父级logger
35
+ self.logger.propagate = False
36
+
37
+ def get_logger(self, level=logging.INFO):
38
+ self.logger.setLevel(level)
39
+ return self.logger
40
+
41
+
42
+ def get_logger(level=logging.INFO):
43
+ """获取单例logger实例"""
44
+ return LoggerSingleton().get_logger(level)
@@ -0,0 +1 @@
1
+ from .spark_client import SparkClient
@@ -1,21 +1,20 @@
1
1
  from collections import defaultdict
2
- from typing import Optional, Any, Dict, List
2
+ from typing import Optional, Any, List
3
3
 
4
- import logging
5
4
  from pyspark.sql import SparkSession, DataFrame
6
5
  from pyspark.sql.catalog import Column
7
6
  from pyspark.sql.functions import when, isnull
8
7
  from pyspark.sql.types import StructType, StringType, StructField
9
8
  from mlflow.pyfunc import spark_udf
10
9
 
11
- from wedata.feature_store.constants.constants import (
10
+ from wedata.common.constants.constants import (
12
11
  _PREBUILT_ENV_URI
13
12
  )
14
13
 
15
- from wedata.feature_store.entities.feature import Feature
16
- from wedata.feature_store.entities.feature_table import FeatureTable
17
- from wedata.feature_store.entities.function_info import FunctionParameterInfo, FunctionInfo
18
- from wedata.feature_store.utils.common_utils import unsanitize_identifier, check_spark_table_exists, check_package_version
14
+ from wedata.common.entities.feature import Feature
15
+ from wedata.common.entities.feature_table import FeatureTable
16
+ from wedata.common.entities.function_info import FunctionParameterInfo, FunctionInfo
17
+ from wedata.common.utils.common_utils import unsanitize_identifier, check_spark_table_exists, check_package_version
19
18
 
20
19
 
21
20
  class SparkClient:
@@ -36,7 +35,6 @@ class SparkClient:
36
35
  catalog = self._spark.sql("SELECT current_catalog()").first()[0]
37
36
  return unsanitize_identifier(catalog)
38
37
 
39
-
40
38
  def get_current_database(self):
41
39
  """
42
40
  获取Spark上下文中当前设置的数据库名称
@@ -56,7 +54,6 @@ class SparkClient:
56
54
  def createDataFrame(self, data, schema) -> DataFrame:
57
55
  return self._spark.createDataFrame(data, schema)
58
56
 
59
-
60
57
  def read_table(self, table_name):
61
58
  """读取Spark表数据
62
59
 
@@ -14,15 +14,11 @@ from mlflow.store.artifact.models_artifact_repo import ModelsArtifactRepository
14
14
  from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository
15
15
  from mlflow.utils import databricks_utils
16
16
 
17
- from wedata.feature_store.constants import constants
18
- from wedata.feature_store.constants.constants import MODEL_DATA_PATH_ROOT
17
+ from wedata.common.constants import constants
18
+ from wedata.common.constants.constants import MODEL_DATA_PATH_ROOT
19
+ from wedata.common.log import get_logger
19
20
  from pyspark.sql import SparkSession
20
21
 
21
- import logging
22
-
23
- # 配置日志(可选,根据实际情况配置)
24
- logging.basicConfig(level=logging.ERROR)
25
-
26
22
 
27
23
  def validate_table_name(name: str):
28
24
  """
@@ -57,14 +53,16 @@ def build_full_table_name(table_name: str, database_name: Optional[str] = None)
57
53
  """
58
54
 
59
55
  feature_store_database_name = os.environ.get("WEDATA_DEFAULT_FEATURE_STORE_DATABASE")
56
+ logger = get_logger()
60
57
  if database_name:
61
58
  feature_store_database_name = database_name
62
59
 
63
60
  if not feature_store_database_name:
64
- logging.error("The current user has not configured a default feature database. Please contact the manager account to configure it.")
61
+ logger.error("The current user has not configured a default feature database. "
62
+ "Please contact the manager account to configure it.")
65
63
  raise RuntimeError("Feature store is not configured! Please contact the main account to configure it.")
66
64
 
67
- logging.info("feature database:{}".format(feature_store_database_name))
65
+ logger.debug("feature database:{}".format(feature_store_database_name))
68
66
 
69
67
  feature_store_database = f"{feature_store_database_name}.{table_name}"
70
68
 
@@ -38,10 +38,10 @@ def get_region() -> str:
38
38
  获取当前地域
39
39
  """
40
40
  region_dlc = os.environ.get("DLC_REGION")
41
- region_emr = os.environ.get("EMR_REGION")
41
+ region_emr = os.environ.get("KERNEL_REGION")
42
42
  region = region_dlc if region_dlc else region_emr
43
43
  if not region:
44
- raise EnvironmentError("environment variable DLC_REGION or EMR_REGION is not set, "
44
+ raise EnvironmentError("environment variable DLC_REGION or KERNEL_REGION is not set, "
45
45
  "please check environment configuration")
46
46
  return region
47
47
 
@@ -68,24 +68,30 @@ def get_database_name(database_name: str) -> str:
68
68
  "please check environment configuration")
69
69
 
70
70
 
71
+ def set_default_database(database_name: str):
72
+ """
73
+ 设置默认数据库名称
74
+ """
75
+ if not isinstance(database_name, str):
76
+ raise ValueError("database_name must be a string")
77
+ os.environ["WEDATA_DEFAULT_FEATURE_STORE_DATABASE"] = database_name
78
+
79
+
71
80
  def get_engine_name() -> str:
72
81
  """
73
82
  获取引擎名称
74
83
  """
75
- engine_name = os.environ.get("KERNEL_ENGINE_NAME")
76
- if engine_name:
77
- return engine_name
78
- raise EnvironmentError("environment variable KERNEL_ENGINE_NAME is not set, please check environment configuration")
84
+ # 因为DLC有特殊,所以先判断DLC,如果没有再判断EMR
85
+ if get_engine_type() == "DLC":
86
+ return _get_variable("KERNEL_ENGINE")
87
+ return _get_variable("KERNEL_ENGINE_NAME")
79
88
 
80
89
 
81
90
  def get_engine_id() -> str:
82
91
  """
83
92
  获取引擎ID
84
93
  """
85
- engine_id = os.environ.get("KERNEL_ENGINE")
86
- if engine_id:
87
- return engine_id
88
- raise EnvironmentError("environment variable KERNEL_ENGINE is not set, please check environment configuration")
94
+ return _get_variable("KERNEL_ENGINE")
89
95
 
90
96
 
91
97
  def get_engine_type() -> str:
@@ -94,3 +100,18 @@ def get_engine_type() -> str:
94
100
  """
95
101
  return "DLC" if os.environ.get("DLC_REGION") else "EMR"
96
102
 
103
+
104
+ def get_feast_remote_url() -> str:
105
+ """
106
+ 获取Feast远程URL
107
+ """
108
+ return _get_variable("KERNEL_FEAST_REMOTE_ADDRESS")
109
+
110
+
111
+ def _get_variable(variable_key: str, is_raise: bool = True, default_value: str = None) -> str:
112
+ val = os.environ.get(variable_key, default_value)
113
+ if not val:
114
+ if is_raise:
115
+ raise EnvironmentError(f"environment variable {variable_key} is not set, "
116
+ f"please check environment configuration")
117
+ return val
@@ -11,13 +11,13 @@ from pyspark.sql import functions as F
11
11
  import pyspark.sql.functions as psf
12
12
 
13
13
 
14
- from wedata.feature_store.entities.environment_variables import BROADCAST_JOIN_THRESHOLD
15
- from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
16
- from wedata.feature_store.entities.feature_lookup import FeatureLookup
17
- from wedata.feature_store.entities.feature_spec import FeatureSpec
18
- from wedata.feature_store.entities.feature_table import FeatureTable
14
+ from wedata.common.entities.environment_variables import BROADCAST_JOIN_THRESHOLD
15
+ from wedata.common.entities.feature_column_info import FeatureColumnInfo
16
+ from wedata.common.entities.feature_lookup import FeatureLookup
17
+ from wedata.common.entities.feature_spec import FeatureSpec
18
+ from wedata.common.entities.feature_table import FeatureTable
19
19
 
20
- from wedata.feature_store.utils import uc_utils
20
+ from wedata.common.utils import uc_utils
21
21
 
22
22
  _logger = logging.getLogger(__name__)
23
23
 
@@ -4,14 +4,19 @@ from functools import reduce
4
4
  from typing import Dict, List, Tuple, Union
5
5
 
6
6
  import yaml
7
- from mlflow.utils.file_utils import YamlSafeDumper
8
-
9
- from wedata.feature_store.entities.column_info import ColumnInfo
10
- from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
11
- from wedata.feature_store.entities.feature_spec import FeatureSpec
12
- from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
13
- from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
14
- from wedata.feature_store.utils.topological_sort import topological_sort
7
+
8
+ # MLflow 3.x compatibility: YamlSafeDumper was removed, use yaml.SafeDumper directly
9
+ try:
10
+ from mlflow.utils.file_utils import YamlSafeDumper
11
+ except ImportError:
12
+ YamlSafeDumper = yaml.SafeDumper
13
+
14
+ from wedata.common.entities.column_info import ColumnInfo
15
+ from wedata.common.entities.feature_column_info import FeatureColumnInfo
16
+ from wedata.common.entities.feature_spec import FeatureSpec
17
+ from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
18
+ from wedata.common.entities.source_data_column_info import SourceDataColumnInfo
19
+ from wedata.common.utils.topological_sort import topological_sort
15
20
 
16
21
  DEFAULT_GRAPH_DEPTH_LIMIT = 5
17
22
 
@@ -1,11 +1,11 @@
1
1
  import copy
2
2
  from typing import List, Union
3
3
 
4
- from wedata.feature_store.entities.feature_function import FeatureFunction
5
- from wedata.feature_store.entities.feature_lookup import FeatureLookup
6
- from wedata.feature_store.spark_client.spark_client import SparkClient
7
- from wedata.feature_store.utils import uc_utils
8
- from wedata.feature_store.utils.feature_lookup_utils import get_feature_lookups_with_full_table_names
4
+ from wedata.common.entities.feature_function import FeatureFunction
5
+ from wedata.common.entities.feature_lookup import FeatureLookup
6
+ from wedata.common.spark_client import SparkClient
7
+ from wedata.common.utils import uc_utils
8
+ from wedata.common.utils.feature_lookup_utils import get_feature_lookups_with_full_table_names
9
9
 
10
10
 
11
11
  def format_feature_lookups_and_functions(
@@ -4,10 +4,11 @@ from typing import Dict, List
4
4
  from pyspark.sql import DataFrame
5
5
  from pyspark.sql.functions import expr
6
6
 
7
- from wedata.feature_store.entities.feature_function import FeatureFunction
8
- from wedata.feature_store.entities.function_info import FunctionInfo
9
- from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
10
- from wedata.feature_store.utils import common_utils, uc_utils
7
+ from wedata.common.entities.feature_function import FeatureFunction
8
+ from wedata.common.entities.function_info import FunctionInfo
9
+ from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
10
+ from wedata.common.utils import common_utils
11
+ from wedata.common.utils import uc_utils
11
12
 
12
13
 
13
14
  def _udf_expr(udf_name: str, arguments: List[str]) -> expr:
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
 
3
- from wedata.feature_store.constants.constants import _ERROR, _WARN
3
+ from wedata.common.constants.constants import _ERROR, _WARN
4
4
 
5
5
  _logger = logging.getLogger(__name__)
6
6
 
@@ -7,10 +7,10 @@ from mlflow.types import ColSpec
7
7
  from mlflow.types import DataType as MlflowDataType
8
8
  from mlflow.types import ParamSchema, Schema
9
9
 
10
- from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
11
- from wedata.feature_store.entities.feature_spec import FeatureSpec
12
- from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
13
- from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
10
+ from wedata.common.entities.feature_column_info import FeatureColumnInfo
11
+ from wedata.common.entities.feature_spec import FeatureSpec
12
+ from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
13
+ from wedata.common.entities.source_data_column_info import SourceDataColumnInfo
14
14
 
15
15
  _logger = logging.getLogger(__name__)
16
16
 
@@ -4,19 +4,19 @@ from typing import Dict, List, Optional, Set
4
4
 
5
5
  from pyspark.sql import DataFrame
6
6
 
7
- from wedata.feature_store.entities.column_info import ColumnInfo
8
- from wedata.feature_store.entities.feature import Feature
9
- from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
10
- from wedata.feature_store.entities.feature_lookup import FeatureLookup
11
- from wedata.feature_store.entities.feature_spec import FeatureSpec
12
- from wedata.feature_store.entities.feature_table import FeatureTable
13
- from wedata.feature_store.entities.feature_table_info import FeatureTableInfo
14
- from wedata.feature_store.entities.function_info import FunctionInfo
15
- from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
16
- from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
17
-
18
- from wedata.feature_store.utils import common_utils, validation_utils, uc_utils, schema_utils
19
- from wedata.feature_store.utils.feature_spec_utils import assign_topological_ordering
7
+ from wedata.common.entities.column_info import ColumnInfo
8
+ from wedata.common.entities.feature import Feature
9
+ from wedata.common.entities.feature_column_info import FeatureColumnInfo
10
+ from wedata.common.entities.feature_lookup import FeatureLookup
11
+ from wedata.common.entities.feature_spec import FeatureSpec
12
+ from wedata.common.entities.feature_table import FeatureTable
13
+ from wedata.common.entities.feature_table_info import FeatureTableInfo
14
+ from wedata.common.entities.function_info import FunctionInfo
15
+ from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
16
+ from wedata.common.entities.source_data_column_info import SourceDataColumnInfo
17
+
18
+ from wedata.common.utils import common_utils, validation_utils
19
+ from wedata.common.utils.feature_spec_utils import assign_topological_ordering
20
20
 
21
21
  _logger = logging.getLogger(__name__)
22
22
 
@@ -3,7 +3,7 @@ import re
3
3
  from typing import Optional, Set, Any, List
4
4
  from datetime import datetime, timezone
5
5
 
6
- from wedata.feature_store.entities.feature_spec import FeatureSpec
6
+ from wedata.common.entities.feature_spec import FeatureSpec
7
7
 
8
8
  SINGLE_LEVEL_NAMESPACE_REGEX = r"^[^\. \/\x00-\x1F\x7F]+$"
9
9
  TWO_LEVEL_NAMESPACE_REGEX = r"^[^\. \/\x00-\x1F\x7F]+(\.[^\. \/\x00-\x1F\x7F]+)$"
@@ -0,0 +1 @@
1
+ from .client import FeatureEngineeringClient