PyPI - tencent-wedata-feature-engineering-dev - Versions diffs - 0.1.48__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

tencent-wedata-feature-engineering-dev 0.1.48py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

wedata/{feature_store → common}/entities/feature_lookup.py RENAMED Viewed

@@ -3,7 +3,7 @@ import datetime
 import logging
 from typing import Dict, List, Optional, Union
-from wedata.feature_store.utils import common_utils
+from wedata.common.utils import common_utils
 from wedata.feature_store.common.store_config.redis import RedisStoreConfig
 _logger = logging.getLogger(__name__)

wedata/{feature_store → common}/entities/feature_spec.py RENAMED Viewed

@@ -7,11 +7,11 @@ import mlflow
 from google.protobuf.json_format import MessageToDict, ParseDict
 from mlflow.utils.file_utils import TempDir, read_yaml, write_yaml
-from wedata.feature_store.common.protos import feature_store_pb2
-from wedata.feature_store.entities.column_info import ColumnInfo
-from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
-from wedata.feature_store.entities.function_info import FunctionInfo
-from wedata.feature_store.entities.feature_spec_constants import (
+from wedata.common.protos import feature_store_pb2
+from wedata.common.entities.column_info import ColumnInfo
+from wedata.common.entities.feature_column_info import FeatureColumnInfo
+from wedata.common.entities.function_info import FunctionInfo
+from wedata.common.entities.feature_spec_constants import (
     BOUND_TO,
     DATA_TYPE,
     FEATURE_COLUMN_INFO,
@@ -34,10 +34,10 @@ from wedata.feature_store.entities.feature_spec_constants import (
     TRAINING_DATA,
     UDF_NAME,
 )
-from wedata.feature_store.entities.feature_table_info import FeatureTableInfo
-from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
-from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
-from wedata.feature_store.utils import common_utils
+from wedata.common.entities.feature_table_info import FeatureTableInfo
+from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
+from wedata.common.entities.source_data_column_info import SourceDataColumnInfo
+from wedata.common.utils import common_utils
 # Change log for serialization version. Please update for each serialization version.
 # 1. Initial.

wedata/{feature_store → common}/entities/feature_table_info.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from typing import Optional
+from wedata.common.protos import feature_store_pb2
-from wedata.feature_store.common.protos import feature_store_pb2
 class FeatureTableInfo:
     def __init__(

wedata/{feature_store → common}/entities/function_info.py RENAMED Viewed

@@ -1,7 +1,8 @@
 from typing import List, Optional
-from wedata.feature_store.common.protos import feature_store_pb2
+from wedata.common.protos import feature_store_pb2
 class FunctionParameterInfo():
     def __init__(self, name: str, type_text: str):

wedata/{feature_store → common}/entities/on_demand_column_info.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from typing import Dict
-from wedata.feature_store.common.protos import feature_store_pb2
+from wedata.common.protos import feature_store_pb2
 class OnDemandColumnInfo:
     def __init__(

wedata/{feature_store → common}/entities/source_data_column_info.py RENAMED Viewed

@@ -1,4 +1,6 @@
-from wedata.feature_store.common.protos import feature_store_pb2
+from wedata.common.protos import feature_store_pb2
 class SourceDataColumnInfo:
     def __init__(self, name: str):
         if not name:

wedata/{feature_store → common}/entities/training_set.py RENAMED Viewed

@@ -2,20 +2,20 @@ from typing import Dict, List, Optional
 from pyspark.sql import DataFrame
-from wedata.feature_store.entities.feature_table import FeatureTable
-from wedata.feature_store.entities.function_info import FunctionInfo
-from wedata.feature_store.utils.feature_lookup_utils import (
+from wedata.common.entities.feature_table import FeatureTable
+from wedata.common.entities.function_info import FunctionInfo
+from wedata.common.utils.feature_lookup_utils import (
     join_feature_data_if_not_overridden,
 )
-from wedata.feature_store.entities.feature_spec import FeatureSpec
-from wedata.feature_store.utils.feature_spec_utils import (
+from wedata.common.entities.feature_spec import FeatureSpec
+from wedata.common.utils.feature_spec_utils import (
     COLUMN_INFO_TYPE_FEATURE,
     COLUMN_INFO_TYPE_ON_DEMAND,
     COLUMN_INFO_TYPE_SOURCE,
     get_feature_execution_groups,
 )
-from wedata.feature_store.utils.on_demand_utils import apply_functions_if_not_overridden
+from wedata.common.utils.on_demand_utils import apply_functions_if_not_overridden
 class TrainingSet:

wedata/common/feast_client/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .feast_client import FeastClient

wedata/{feature_store → common}/feast_client/feast_client.py RENAMED Viewed

@@ -20,7 +20,7 @@ import pytz
 from feast import FeatureStore, RepoConfig, FeatureView
 from pyspark.sql import DataFrame, SparkSession
 from wedata.feature_store.common.store_config.redis import RedisStoreConfig
-from wedata.feature_store.utils import env_utils
+from wedata.common.utils import env_utils
 from feast import Entity, FeatureService
 from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import SparkSource
 from feast.infra.online_stores.redis import RedisOnlineStore

wedata/common/log/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .logger import get_logger

wedata/{feature_store/common → common}/log/logger.py RENAMED Viewed

@@ -14,14 +14,14 @@ class LoggerSingleton:
     def _initialize_logger(self):
         self.logger = logging.getLogger("wedata-feature-engineering")
         self.logger.setLevel(logging.INFO)
         # 清除已有的handler，避免重复添加
         if self.logger.handlers:
             self.logger.handlers.clear()
         # 创建formatter，包含时间、文件名和行号
         formatter = logging.Formatter(
-            fmt='%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s',
+            fmt='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
             datefmt='%Y-%m-%d %H:%M:%S'
         )
@@ -30,11 +30,15 @@ class LoggerSingleton:
         handler.setFormatter(formatter)
         self.logger.addHandler(handler)
+        # 防止消息传播到父级logger
+        self.logger.propagate = False
-    def get_logger(self):
+    def get_logger(self, level=logging.INFO):
+        self.logger.setLevel(level)
         return self.logger
-def get_logger():
+def get_logger(level=logging.INFO):
     """获取单例logger实例"""
-    return LoggerSingleton().get_logger()
+    return LoggerSingleton().get_logger(level)

wedata/common/spark_client/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .spark_client import SparkClient

wedata/{feature_store → common}/spark_client/spark_client.py RENAMED Viewed

@@ -1,21 +1,20 @@
 from collections import defaultdict
-from typing import Optional, Any, Dict, List
+from typing import Optional, Any, List
-import logging
 from pyspark.sql import SparkSession, DataFrame
 from pyspark.sql.catalog import Column
 from pyspark.sql.functions import when, isnull
 from pyspark.sql.types import StructType, StringType, StructField
 from mlflow.pyfunc import spark_udf
-from wedata.feature_store.constants.constants import (
+from wedata.common.constants.constants import (
     _PREBUILT_ENV_URI
 )
-from wedata.feature_store.entities.feature import Feature
-from wedata.feature_store.entities.feature_table import FeatureTable
-from wedata.feature_store.entities.function_info import FunctionParameterInfo, FunctionInfo
-from wedata.feature_store.utils.common_utils import unsanitize_identifier, check_spark_table_exists, check_package_version
+from wedata.common.entities.feature import Feature
+from wedata.common.entities.feature_table import FeatureTable
+from wedata.common.entities.function_info import FunctionParameterInfo, FunctionInfo
+from wedata.common.utils.common_utils import unsanitize_identifier, check_spark_table_exists, check_package_version
 class SparkClient:

wedata/{feature_store → common}/utils/common_utils.py RENAMED Viewed

@@ -14,15 +14,11 @@ from mlflow.store.artifact.models_artifact_repo import ModelsArtifactRepository
 from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository
 from mlflow.utils import databricks_utils
-from wedata.feature_store.constants import constants
-from wedata.feature_store.constants.constants import MODEL_DATA_PATH_ROOT
+from wedata.common.constants import constants
+from wedata.common.constants.constants import MODEL_DATA_PATH_ROOT
+from wedata.common.log import get_logger
 from pyspark.sql import SparkSession
-import logging
-# 配置日志（可选，根据实际情况配置）
-logging.basicConfig(level=logging.ERROR)
 def validate_table_name(name: str):
     """
@@ -57,14 +53,16 @@ def build_full_table_name(table_name: str, database_name: Optional[str] = None)
     """
     feature_store_database_name = os.environ.get("WEDATA_DEFAULT_FEATURE_STORE_DATABASE")
+    logger = get_logger()
     if database_name:
         feature_store_database_name = database_name
     if not feature_store_database_name:
-        logging.error("The current user has not configured a default feature database. Please contact the manager account to configure it.")
+        logger.error("The current user has not configured a default feature database. "
+                     "Please contact the manager account to configure it.")
         raise RuntimeError("Feature store is not configured! Please contact the main account to configure it.")
-    logging.info("feature database:{}".format(feature_store_database_name))
+    logger.debug("feature database:{}".format(feature_store_database_name))
     feature_store_database = f"{feature_store_database_name}.{table_name}"

wedata/{feature_store → common}/utils/env_utils.py RENAMED Viewed

@@ -68,10 +68,22 @@ def get_database_name(database_name: str) -> str:
                            "please check environment configuration")
+def set_default_database(database_name: str):
+    """
+    设置默认数据库名称
+    """
+    if not isinstance(database_name, str):
+        raise ValueError("database_name must be a string")
+    os.environ["WEDATA_DEFAULT_FEATURE_STORE_DATABASE"] = database_name
 def get_engine_name() -> str:
     """
     获取引擎名称
     """
+    # 因为DLC有特殊，所以先判断DLC，如果没有再判断EMR
+    if get_engine_type() == "DLC":
+        return _get_variable("KERNEL_ENGINE")
     return _get_variable("KERNEL_ENGINE_NAME")

wedata/{feature_store → common}/utils/feature_lookup_utils.py RENAMED Viewed

@@ -11,13 +11,13 @@ from pyspark.sql import functions as F
 import pyspark.sql.functions as psf
-from wedata.feature_store.entities.environment_variables import BROADCAST_JOIN_THRESHOLD
-from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
-from wedata.feature_store.entities.feature_lookup import FeatureLookup
-from wedata.feature_store.entities.feature_spec import FeatureSpec
-from wedata.feature_store.entities.feature_table import FeatureTable
+from wedata.common.entities.environment_variables import BROADCAST_JOIN_THRESHOLD
+from wedata.common.entities.feature_column_info import FeatureColumnInfo
+from wedata.common.entities.feature_lookup import FeatureLookup
+from wedata.common.entities.feature_spec import FeatureSpec
+from wedata.common.entities.feature_table import FeatureTable
-from wedata.feature_store.utils import uc_utils
+from wedata.common.utils import uc_utils
 _logger = logging.getLogger(__name__)

wedata/{feature_store → common}/utils/feature_spec_utils.py RENAMED Viewed

@@ -4,14 +4,19 @@ from functools import reduce
 from typing import Dict, List, Tuple, Union
 import yaml
-from mlflow.utils.file_utils import YamlSafeDumper
-from wedata.feature_store.entities.column_info import ColumnInfo
-from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
-from wedata.feature_store.entities.feature_spec import FeatureSpec
-from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
-from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
-from wedata.feature_store.utils.topological_sort import topological_sort
+# MLflow 3.x compatibility: YamlSafeDumper was removed, use yaml.SafeDumper directly
+try:
+    from mlflow.utils.file_utils import YamlSafeDumper
+except ImportError:
+    YamlSafeDumper = yaml.SafeDumper
+from wedata.common.entities.column_info import ColumnInfo
+from wedata.common.entities.feature_column_info import FeatureColumnInfo
+from wedata.common.entities.feature_spec import FeatureSpec
+from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
+from wedata.common.entities.source_data_column_info import SourceDataColumnInfo
+from wedata.common.utils.topological_sort import topological_sort
 DEFAULT_GRAPH_DEPTH_LIMIT = 5

wedata/{feature_store → common}/utils/feature_utils.py RENAMED Viewed

@@ -1,11 +1,11 @@
 import copy
 from typing import List, Union
-from wedata.feature_store.entities.feature_function import FeatureFunction
-from wedata.feature_store.entities.feature_lookup import FeatureLookup
-from wedata.feature_store.spark_client.spark_client import SparkClient
-from wedata.feature_store.utils import uc_utils
-from wedata.feature_store.utils.feature_lookup_utils import get_feature_lookups_with_full_table_names
+from wedata.common.entities.feature_function import FeatureFunction
+from wedata.common.entities.feature_lookup import FeatureLookup
+from wedata.common.spark_client import SparkClient
+from wedata.common.utils import uc_utils
+from wedata.common.utils.feature_lookup_utils import get_feature_lookups_with_full_table_names
 def format_feature_lookups_and_functions(

wedata/{feature_store → common}/utils/on_demand_utils.py RENAMED Viewed

@@ -4,10 +4,11 @@ from typing import Dict, List
 from pyspark.sql import DataFrame
 from pyspark.sql.functions import expr
-from wedata.feature_store.entities.feature_function import FeatureFunction
-from wedata.feature_store.entities.function_info import FunctionInfo
-from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
-from wedata.feature_store.utils import common_utils, uc_utils
+from wedata.common.entities.feature_function import FeatureFunction
+from wedata.common.entities.function_info import FunctionInfo
+from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
+from wedata.common.utils import common_utils
+from wedata.common.utils import uc_utils
 def _udf_expr(udf_name: str, arguments: List[str]) -> expr:

wedata/{feature_store → common}/utils/schema_utils.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import logging
-from wedata.feature_store.constants.constants import _ERROR, _WARN
+from wedata.common.constants.constants import _ERROR, _WARN
 _logger = logging.getLogger(__name__)

wedata/{feature_store → common}/utils/signature_utils.py RENAMED Viewed

@@ -7,10 +7,10 @@ from mlflow.types import ColSpec
 from mlflow.types import DataType as MlflowDataType
 from mlflow.types import ParamSchema, Schema
-from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
-from wedata.feature_store.entities.feature_spec import FeatureSpec
-from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
-from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
+from wedata.common.entities.feature_column_info import FeatureColumnInfo
+from wedata.common.entities.feature_spec import FeatureSpec
+from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
+from wedata.common.entities.source_data_column_info import SourceDataColumnInfo
 _logger = logging.getLogger(__name__)

wedata/{feature_store → common}/utils/training_set_utils.py RENAMED Viewed

@@ -4,19 +4,19 @@ from typing import Dict, List, Optional, Set
 from pyspark.sql import DataFrame
-from wedata.feature_store.entities.column_info import ColumnInfo
-from wedata.feature_store.entities.feature import Feature
-from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
-from wedata.feature_store.entities.feature_lookup import FeatureLookup
-from wedata.feature_store.entities.feature_spec import FeatureSpec
-from wedata.feature_store.entities.feature_table import FeatureTable
-from wedata.feature_store.entities.feature_table_info import FeatureTableInfo
-from wedata.feature_store.entities.function_info import FunctionInfo
-from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
-from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
-from wedata.feature_store.utils import common_utils, validation_utils, uc_utils, schema_utils
-from wedata.feature_store.utils.feature_spec_utils import assign_topological_ordering
+from wedata.common.entities.column_info import ColumnInfo
+from wedata.common.entities.feature import Feature
+from wedata.common.entities.feature_column_info import FeatureColumnInfo
+from wedata.common.entities.feature_lookup import FeatureLookup
+from wedata.common.entities.feature_spec import FeatureSpec
+from wedata.common.entities.feature_table import FeatureTable
+from wedata.common.entities.feature_table_info import FeatureTableInfo
+from wedata.common.entities.function_info import FunctionInfo
+from wedata.common.entities.on_demand_column_info import OnDemandColumnInfo
+from wedata.common.entities.source_data_column_info import SourceDataColumnInfo
+from wedata.common.utils import common_utils, validation_utils
+from wedata.common.utils.feature_spec_utils import assign_topological_ordering
 _logger = logging.getLogger(__name__)

wedata/{feature_store → common}/utils/uc_utils.py RENAMED Viewed

@@ -3,7 +3,7 @@ import re
 from typing import Optional, Set, Any, List
 from datetime import datetime, timezone
-from wedata.feature_store.entities.feature_spec import FeatureSpec
+from wedata.common.entities.feature_spec import FeatureSpec
 SINGLE_LEVEL_NAMESPACE_REGEX = r"^[^\. \/\x00-\x1F\x7F]+$"
 TWO_LEVEL_NAMESPACE_REGEX = r"^[^\. \/\x00-\x1F\x7F]+(\.[^\. \/\x00-\x1F\x7F]+)$"

wedata/feature_engineering/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .client import FeatureEngineeringClient

tencent-wedata-feature-engineering-dev 0.1.48__py3-none-any.whl → 0.2.5__py3-none-any.whl

tencent-wedata-feature-engineering-dev 0.1.48py3-none-any.whl → 0.2.5py3-none-any.whl