tencent-wedata-feature-engineering-dev 0.1.49__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tencent-wedata-feature-engineering-dev might be problematic. Click here for more details.
- {tencent_wedata_feature_engineering_dev-0.1.49.dist-info → tencent_wedata_feature_engineering_dev-0.2.0.dist-info}/METADATA +10 -8
- tencent_wedata_feature_engineering_dev-0.2.0.dist-info/RECORD +46 -0
- {tencent_wedata_feature_engineering_dev-0.1.49.dist-info → tencent_wedata_feature_engineering_dev-0.2.0.dist-info}/WHEEL +1 -1
- wedata/feature_store/client.py +28 -92
- wedata/feature_store/constants/constants.py +2 -5
- wedata/feature_store/entities/feature_lookup.py +0 -17
- wedata/feature_store/entities/feature_spec.py +2 -2
- wedata/feature_store/entities/feature_table.py +1 -5
- wedata/feature_store/entities/function_info.py +4 -1
- wedata/feature_store/feature_table_client/feature_table_client.py +53 -528
- wedata/feature_store/spark_client/spark_client.py +15 -41
- wedata/feature_store/training_set_client/training_set_client.py +10 -9
- wedata/feature_store/utils/common_utils.py +4 -48
- wedata/feature_store/utils/feature_lookup_utils.py +43 -37
- wedata/feature_store/utils/feature_spec_utils.py +1 -1
- wedata/feature_store/utils/uc_utils.py +1 -1
- tencent_wedata_feature_engineering_dev-0.1.49.dist-info/RECORD +0 -66
- wedata/feature_store/cloud_sdk_client/__init__.py +0 -0
- wedata/feature_store/cloud_sdk_client/client.py +0 -108
- wedata/feature_store/cloud_sdk_client/models.py +0 -686
- wedata/feature_store/cloud_sdk_client/utils.py +0 -39
- wedata/feature_store/common/log/__init__.py +0 -0
- wedata/feature_store/common/log/logger.py +0 -40
- wedata/feature_store/common/store_config/__init__.py +0 -0
- wedata/feature_store/common/store_config/redis.py +0 -48
- wedata/feature_store/constants/engine_types.py +0 -34
- wedata/feature_store/feast_client/__init__.py +0 -0
- wedata/feature_store/feast_client/feast_client.py +0 -487
- wedata/feature_store/utils/env_utils.py +0 -108
- wedata/tempo/__init__.py +0 -0
- wedata/tempo/interpol.py +0 -448
- wedata/tempo/intervals.py +0 -1331
- wedata/tempo/io.py +0 -61
- wedata/tempo/ml.py +0 -129
- wedata/tempo/resample.py +0 -318
- wedata/tempo/tsdf.py +0 -1720
- wedata/tempo/utils.py +0 -254
- {tencent_wedata_feature_engineering_dev-0.1.49.dist-info → tencent_wedata_feature_engineering_dev-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from typing import Optional, Any, Dict, List
|
|
3
3
|
|
|
4
|
-
import logging
|
|
5
4
|
from pyspark.sql import SparkSession, DataFrame
|
|
6
5
|
from pyspark.sql.catalog import Column
|
|
7
6
|
from pyspark.sql.functions import when, isnull
|
|
@@ -15,7 +14,7 @@ from wedata.feature_store.constants.constants import (
|
|
|
15
14
|
from wedata.feature_store.entities.feature import Feature
|
|
16
15
|
from wedata.feature_store.entities.feature_table import FeatureTable
|
|
17
16
|
from wedata.feature_store.entities.function_info import FunctionParameterInfo, FunctionInfo
|
|
18
|
-
from wedata.feature_store.utils.common_utils import unsanitize_identifier
|
|
17
|
+
from wedata.feature_store.utils.common_utils import unsanitize_identifier
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
class SparkClient:
|
|
@@ -29,12 +28,10 @@ class SparkClient:
|
|
|
29
28
|
返回:
|
|
30
29
|
str: 当前catalog名称,如果未设置则返回None
|
|
31
30
|
"""
|
|
32
|
-
|
|
33
|
-
if ok:
|
|
31
|
+
try:
|
|
34
32
|
return unsanitize_identifier(self._spark.catalog.currentCatalog())
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
return unsanitize_identifier(catalog)
|
|
33
|
+
except Exception:
|
|
34
|
+
return None
|
|
38
35
|
|
|
39
36
|
def get_current_database(self):
|
|
40
37
|
"""
|
|
@@ -52,9 +49,13 @@ class SparkClient:
|
|
|
52
49
|
# 捕获所有异常并返回None
|
|
53
50
|
return None
|
|
54
51
|
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
55
|
def createDataFrame(self, data, schema) -> DataFrame:
|
|
56
56
|
return self._spark.createDataFrame(data, schema)
|
|
57
57
|
|
|
58
|
+
|
|
58
59
|
def read_table(self, table_name):
|
|
59
60
|
"""读取Spark表数据
|
|
60
61
|
|
|
@@ -67,28 +68,20 @@ class SparkClient:
|
|
|
67
68
|
Raises:
|
|
68
69
|
ValueError: 当表不存在或读取失败时抛出
|
|
69
70
|
"""
|
|
70
|
-
table_name = _translate_spark_table_name(table_name)
|
|
71
71
|
try:
|
|
72
72
|
# 验证表是否存在
|
|
73
|
-
if not
|
|
73
|
+
if not self._spark.catalog.tableExists(table_name):
|
|
74
74
|
raise ValueError(f"Table does not exist: {table_name}")
|
|
75
|
+
|
|
75
76
|
return self._spark.table(table_name)
|
|
76
77
|
|
|
77
78
|
except Exception as e:
|
|
78
79
|
raise ValueError(f"Failed to read table {table_name}: {str(e)}")
|
|
79
80
|
|
|
81
|
+
|
|
80
82
|
def get_features(self, table_name):
|
|
81
83
|
# 查询列信息
|
|
82
|
-
|
|
83
|
-
split = table_name.split(".")
|
|
84
|
-
if len(split) == 2:
|
|
85
|
-
# db.table_name
|
|
86
|
-
columns = self._spark.catalog.listColumns(tableName=split[1], dbName=split[0])
|
|
87
|
-
elif len(split) == 3:
|
|
88
|
-
# catalog.db.table_name
|
|
89
|
-
columns = self._spark.catalog.listColumns(tableName=split[2], dbName=split[1])
|
|
90
|
-
else:
|
|
91
|
-
columns = self._spark.catalog.listColumns(tableName=table_name)
|
|
84
|
+
columns = self._spark.catalog.listColumns(tableName=table_name)
|
|
92
85
|
return [
|
|
93
86
|
Feature(
|
|
94
87
|
feature_table=table_name,
|
|
@@ -100,19 +93,15 @@ class SparkClient:
|
|
|
100
93
|
]
|
|
101
94
|
|
|
102
95
|
def get_feature_table(self, table_name):
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
EMR支持table_name为schema.table
|
|
106
|
-
"""
|
|
96
|
+
# 获取表元数据
|
|
97
|
+
table = self._spark.catalog.getTable(table_name)
|
|
107
98
|
|
|
108
|
-
table_name = _translate_spark_table_name(table_name)
|
|
109
|
-
# table = self._spark.catalog.getTable(table_name)
|
|
110
99
|
# 获取表配置信息
|
|
111
100
|
properties = self._spark.sql(f"SHOW TBLPROPERTIES {table_name}").collect()
|
|
112
101
|
primary_key_str = next((row.value for row in properties if row.key == "primaryKeys"), None)
|
|
113
102
|
primary_keys = primary_key_str.split(",") if primary_key_str else []
|
|
114
103
|
table_id = next((row.value for row in properties if row.key == "table_id"), table_name)
|
|
115
|
-
description = next((row.value for row in properties if row.key == "comment"), None)
|
|
104
|
+
description = table.description or next((row.value for row in properties if row.key == "comment"), None)
|
|
116
105
|
timestamp_keys_str = next((row.value for row in properties if row.key == "timestampKeys"), None)
|
|
117
106
|
timestamp_keys = timestamp_keys_str.split(",") if timestamp_keys_str else []
|
|
118
107
|
# 获取分区字段信息
|
|
@@ -269,18 +258,3 @@ class SparkClient:
|
|
|
269
258
|
kwargs[_PREBUILT_ENV_URI] = prebuilt_env_uri
|
|
270
259
|
|
|
271
260
|
return spark_udf(self._spark, model_uri, **kwargs)
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
def _translate_spark_table_name(table_name):
|
|
275
|
-
from wedata.feature_store.constants.engine_types import judge_engine_type, CalculateEngineTypes
|
|
276
|
-
# 获取表元数据
|
|
277
|
-
if judge_engine_type() == CalculateEngineTypes.EMR:
|
|
278
|
-
split_names = table_name.split(".")
|
|
279
|
-
# print(f"==== EMR TABLE split len({len(split_names)})")
|
|
280
|
-
if len(split_names) <= 2:
|
|
281
|
-
return table_name
|
|
282
|
-
else:
|
|
283
|
-
table_name = ".".join(table_name.split(".")[1:])
|
|
284
|
-
return table_name
|
|
285
|
-
return table_name
|
|
286
|
-
|
|
@@ -309,7 +309,16 @@ class TrainingSetClient:
|
|
|
309
309
|
feature_spec.save(data_path)
|
|
310
310
|
|
|
311
311
|
print(f'artifact_path:{artifact_path},data_path:{data_path},conda_env:{conda_env},'
|
|
312
|
-
f'signature:{signature},input_example:{input_example}')
|
|
312
|
+
f'signature:{signature},input_example:{input_example}')
|
|
313
|
+
|
|
314
|
+
# 设置实例ID标签
|
|
315
|
+
tdlc_home = os.getenv('TDLC_HOME', '')
|
|
316
|
+
if tdlc_home:
|
|
317
|
+
instance_id = tdlc_home.split('/')[-3]
|
|
318
|
+
mlflow.set_tag("instance_id", instance_id)
|
|
319
|
+
|
|
320
|
+
# 设置实验标签,血缘查询需要
|
|
321
|
+
mlflow.set_experiment_tag(f"instance_id_{instance_id}", instance_id)
|
|
313
322
|
|
|
314
323
|
mlflow.pyfunc.log_model(
|
|
315
324
|
artifact_path=artifact_path,
|
|
@@ -360,7 +369,6 @@ class TrainingSetClient:
|
|
|
360
369
|
env_manager: Optional[str] = None,
|
|
361
370
|
local_uri: Optional[str] = None,
|
|
362
371
|
params: Optional[dict[str, Any]] = None,
|
|
363
|
-
timestamp_key: str = None,
|
|
364
372
|
**kwargs,
|
|
365
373
|
) -> DataFrame:
|
|
366
374
|
# TODO:ML 待确定是否需要
|
|
@@ -424,11 +432,8 @@ class TrainingSetClient:
|
|
|
424
432
|
|
|
425
433
|
# Validate that columns needed for joining feature tables exist and are not duplicates.
|
|
426
434
|
feature_input_keys = []
|
|
427
|
-
print("====>timestamp_key:", timestamp_key)
|
|
428
435
|
for fci in feature_spec.feature_column_infos:
|
|
429
|
-
print("====>fci:", fci.lookup_key)
|
|
430
436
|
feature_input_keys.extend([k for k in fci.lookup_key])
|
|
431
|
-
|
|
432
437
|
on_demand_input_names = uc_utils.get_unique_list_order(
|
|
433
438
|
[
|
|
434
439
|
input_name
|
|
@@ -442,16 +447,12 @@ class TrainingSetClient:
|
|
|
442
447
|
]
|
|
443
448
|
# print(f"wedata source_data_names:{source_data_names}")
|
|
444
449
|
|
|
445
|
-
print("===>source_data_names:", source_data_names)
|
|
446
|
-
|
|
447
450
|
feature_output_names = [
|
|
448
451
|
fci.output_name for fci in feature_spec.feature_column_infos
|
|
449
452
|
]
|
|
450
|
-
print("====>feature_output_names:", feature_output_names)
|
|
451
453
|
on_demand_output_names = [
|
|
452
454
|
odci.output_name for odci in feature_spec.on_demand_column_infos
|
|
453
455
|
]
|
|
454
|
-
print("====>on_demand_output_names:", on_demand_output_names)
|
|
455
456
|
all_output_names = set(
|
|
456
457
|
source_data_names + feature_output_names + on_demand_output_names
|
|
457
458
|
)
|
|
@@ -2,12 +2,14 @@
|
|
|
2
2
|
通用工具函数
|
|
3
3
|
"""
|
|
4
4
|
import os
|
|
5
|
+
import json
|
|
5
6
|
from collections import Counter
|
|
6
7
|
from datetime import datetime, timezone
|
|
7
8
|
from functools import wraps
|
|
8
9
|
from typing import Any, Dict, List, Optional
|
|
9
10
|
from urllib.parse import urlparse
|
|
10
11
|
|
|
12
|
+
import mlflow
|
|
11
13
|
from mlflow.exceptions import RestException
|
|
12
14
|
from mlflow.store.artifact.artifact_repository_registry import get_artifact_repository
|
|
13
15
|
from mlflow.store.artifact.models_artifact_repo import ModelsArtifactRepository
|
|
@@ -16,7 +18,6 @@ from mlflow.utils import databricks_utils
|
|
|
16
18
|
|
|
17
19
|
from wedata.feature_store.constants import constants
|
|
18
20
|
from wedata.feature_store.constants.constants import MODEL_DATA_PATH_ROOT
|
|
19
|
-
from pyspark.sql import SparkSession
|
|
20
21
|
|
|
21
22
|
import logging
|
|
22
23
|
|
|
@@ -116,7 +117,7 @@ def _get_dbutils():
|
|
|
116
117
|
def utc_timestamp_ms_from_iso_datetime_string(date_string: str) -> int:
|
|
117
118
|
dt = datetime.fromisoformat(date_string)
|
|
118
119
|
utc_dt = dt.replace(tzinfo=timezone.utc)
|
|
119
|
-
return
|
|
120
|
+
return 1000 * utc_dt.timestamp()
|
|
120
121
|
|
|
121
122
|
|
|
122
123
|
def pip_depependency_pinned_major_version(pip_package_name, major_version):
|
|
@@ -303,49 +304,4 @@ def validate_database(database_name):
|
|
|
303
304
|
database_name = os.environ.get(constants.WEDATA_DEFAULT_FEATURE_STORE_DATABASE)
|
|
304
305
|
if database_name is None:
|
|
305
306
|
raise ValueError("Database_name variable or default database is not set.")
|
|
306
|
-
return True
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
def check_package_version(package_name, expected_version, op="=="):
|
|
310
|
-
"""
|
|
311
|
-
检查指定包的版本是否满足预期版本要求。
|
|
312
|
-
Args:
|
|
313
|
-
package_name: 包名称
|
|
314
|
-
expected_version: 预期版本要求,例如3.5.5
|
|
315
|
-
op: 比较运算符,默认为 "=="
|
|
316
|
-
Returns:
|
|
317
|
-
(是否成功找到包,版本是否匹配,已安装版本)
|
|
318
|
-
如果满足,返回 (True, True, installed_version);否则返回 (True, False, installed_version)。
|
|
319
|
-
如果指定包不存在,返回 (False, False, None)。
|
|
320
|
-
"""
|
|
321
|
-
# 在脚本顶部添加
|
|
322
|
-
from packaging import version
|
|
323
|
-
import importlib.metadata
|
|
324
|
-
try:
|
|
325
|
-
installed_version = importlib.metadata.version(package_name)
|
|
326
|
-
|
|
327
|
-
if not op:
|
|
328
|
-
raise ValueError(f"Invalid op: {op}. need be in ['==', '>', '<', '>=', '<=', '!=', '~=']")
|
|
329
|
-
# 支持版本范围检查(如 ">=2.0,<3.0")
|
|
330
|
-
# 使用 packaging.version 进行复杂版本`检查
|
|
331
|
-
i = version.parse(installed_version)
|
|
332
|
-
e = version.parse(expected_version)
|
|
333
|
-
return True, eval(f"i{op}e"), installed_version
|
|
334
|
-
|
|
335
|
-
except importlib.metadata.PackageNotFoundError:
|
|
336
|
-
return False, False, None
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
def check_spark_table_exists(spark_client: SparkSession, full_table_name: str) -> bool:
|
|
340
|
-
_, ok, _ = check_package_version("pyspark", "3.5.0", ">=")
|
|
341
|
-
try:
|
|
342
|
-
return spark_client.catalog.tableExists(full_table_name)
|
|
343
|
-
except AttributeError:
|
|
344
|
-
split = full_table_name.split(".")
|
|
345
|
-
if len(split) == 2:
|
|
346
|
-
query = f"SHOW TABLES IN {split[0]} LIKE '{split[1]}'"
|
|
347
|
-
elif len(split) == 3:
|
|
348
|
-
query = f"SHOW TABLES IN {split[1]} LIKE '{split[2]}'"
|
|
349
|
-
else:
|
|
350
|
-
query = f"SHOW TABLES LIKE '{full_table_name}'"
|
|
351
|
-
return spark_client.sql(query).count() > 0
|
|
307
|
+
return True
|
|
@@ -6,10 +6,9 @@ from collections import defaultdict
|
|
|
6
6
|
from functools import reduce
|
|
7
7
|
from typing import Dict, List, Optional, Tuple
|
|
8
8
|
|
|
9
|
-
from pyspark.sql import DataFrame
|
|
9
|
+
from pyspark.sql import DataFrame, Window
|
|
10
10
|
from pyspark.sql import functions as F
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
from pyspark.sql.functions import sum, unix_timestamp
|
|
13
12
|
|
|
14
13
|
from wedata.feature_store.entities.environment_variables import BROADCAST_JOIN_THRESHOLD
|
|
15
14
|
from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
|
|
@@ -17,7 +16,7 @@ from wedata.feature_store.entities.feature_lookup import FeatureLookup
|
|
|
17
16
|
from wedata.feature_store.entities.feature_spec import FeatureSpec
|
|
18
17
|
from wedata.feature_store.entities.feature_table import FeatureTable
|
|
19
18
|
|
|
20
|
-
from wedata.feature_store.utils import uc_utils
|
|
19
|
+
from wedata.feature_store.utils import common_utils, validation_utils, uc_utils
|
|
21
20
|
|
|
22
21
|
_logger = logging.getLogger(__name__)
|
|
23
22
|
|
|
@@ -75,21 +74,19 @@ def _spark_asof_join_features(
|
|
|
75
74
|
else:
|
|
76
75
|
joined_df = _spark_asof_join_features_tempo(
|
|
77
76
|
df=df,
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
lookback_window=lookback_window_seconds,
|
|
77
|
+
df_lookup_keys=df_lookup_keys,
|
|
78
|
+
df_timestamp_lookup_key=df_timestamp_lookup_key,
|
|
79
|
+
feature_and_keys=feature_and_keys,
|
|
82
80
|
ft_features=ft_features,
|
|
81
|
+
lookback_window_seconds=lookback_window_seconds,
|
|
83
82
|
)
|
|
84
83
|
return joined_df
|
|
85
84
|
|
|
86
|
-
|
|
87
85
|
def _spark_asof_join_features_tempo(
|
|
88
86
|
df: DataFrame,
|
|
89
87
|
feature_df: DataFrame,
|
|
90
88
|
lookup_keys: List[str],
|
|
91
89
|
timestamp_key: str,
|
|
92
|
-
ft_features: List[Tuple[str, str]],
|
|
93
90
|
lookback_window: Optional[float] = None
|
|
94
91
|
) -> DataFrame:
|
|
95
92
|
"""
|
|
@@ -101,33 +98,42 @@ def _spark_asof_join_features_tempo(
|
|
|
101
98
|
:param lookback_window: 最大回溯时间(秒)
|
|
102
99
|
:return: 连接后的DataFrame
|
|
103
100
|
"""
|
|
104
|
-
|
|
101
|
+
|
|
105
102
|
# 1. 只保留键列和时间戳列
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
103
|
+
df_keys = df.select(lookup_keys + [timestamp_key])
|
|
104
|
+
feature_keys = feature_df.select(lookup_keys + [timestamp_key])
|
|
105
|
+
|
|
106
|
+
# 2. 创建连接条件
|
|
107
|
+
join_cond = [df_keys[k] == feature_keys[k] for k in lookup_keys]
|
|
108
|
+
join_cond = reduce(lambda x, y: x & y, join_cond)
|
|
109
|
+
join_cond &= (df_keys[timestamp_key] >= feature_keys[timestamp_key])
|
|
110
|
+
|
|
111
|
+
if lookback_window:
|
|
112
|
+
join_cond &= (
|
|
113
|
+
(F.unix_timestamp(df_keys[timestamp_key]) -
|
|
114
|
+
F.unix_timestamp(feature_keys[timestamp_key])) <= lookback_window
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# 3. 执行连接并找出每个主表记录对应的最新特征记录
|
|
118
|
+
joined = df_keys.join(feature_keys, join_cond, "left")
|
|
119
|
+
|
|
120
|
+
# 按主表键分组,找出最大特征时间戳
|
|
121
|
+
window = Window.partitionBy(lookup_keys).orderBy(F.desc(timestamp_key))
|
|
122
|
+
latest_features = (
|
|
123
|
+
joined
|
|
124
|
+
.withColumn("rn", F.row_number().over(window))
|
|
125
|
+
.filter(F.col("rn") == 1)
|
|
126
|
+
.drop("rn")
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# 4. 最终连接获取完整特征数据
|
|
130
|
+
result = df.join(
|
|
131
|
+
latest_features.select(lookup_keys + [timestamp_key, "feature_col"]),
|
|
132
|
+
lookup_keys + [timestamp_key],
|
|
133
|
+
"left"
|
|
134
|
+
)
|
|
130
135
|
|
|
136
|
+
return result
|
|
131
137
|
|
|
132
138
|
def _spark_asof_join_features_native(
|
|
133
139
|
labels_df: DataFrame,
|
|
@@ -184,8 +190,8 @@ def _spark_asof_join_features_native(
|
|
|
184
190
|
)
|
|
185
191
|
if lookback_window_seconds is not None:
|
|
186
192
|
join_conditions &= (
|
|
187
|
-
|
|
188
|
-
-
|
|
193
|
+
unix_timestamp(labels_df_keys_only[timestamp_key])
|
|
194
|
+
- unix_timestamp(features_df_keys_only["__features_tk"])
|
|
189
195
|
) <= lookback_window_seconds
|
|
190
196
|
|
|
191
197
|
# Join labels and features DataFrames
|
|
@@ -287,7 +287,7 @@ def utc_timestamp_ms_from_iso_datetime_string(date_string: str) -> int:
|
|
|
287
287
|
# The Feature Store backend returns timestamps in milliseconds, so this allows for direct comparisons.
|
|
288
288
|
dt = datetime.fromisoformat(date_string)
|
|
289
289
|
utc_dt = dt.replace(tzinfo=timezone.utc)
|
|
290
|
-
return
|
|
290
|
+
return 1000 * utc_dt.timestamp()
|
|
291
291
|
|
|
292
292
|
def get_unique_list_order(elements: List[Any]) -> List[Any]:
|
|
293
293
|
"""
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
wedata/__init__.py,sha256=GYxqkkgH0oH4QtNiOCZHuGkc0sSH1LgEqmhSX6sB4So,200
|
|
2
|
-
wedata/feature_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
wedata/feature_store/client.py,sha256=B6fy-PGJZsEHGPE8tDmrTolyioJ_-Po2DK3p_HCW7Sw,19552
|
|
4
|
-
wedata/feature_store/mlflow_model.py,sha256=OCUuccOoO0NXWSzIPoGeL03Ha1Q3aQTJW2RlJrTCmzc,554
|
|
5
|
-
wedata/feature_store/cloud_sdk_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
wedata/feature_store/cloud_sdk_client/client.py,sha256=B7nCQ_MvbLP4ieT7rsa32FHws2fOG4VQZT5lmQ3Cvzk,4914
|
|
7
|
-
wedata/feature_store/cloud_sdk_client/models.py,sha256=7_QUq0kZcrcclRMsIYFoqBrlzVwaHoVY-yU5SHIrJWM,19789
|
|
8
|
-
wedata/feature_store/cloud_sdk_client/utils.py,sha256=xwvXJpk2RXbJtgOaXCZQbGRrlzcTRzv27yQFxKp_X84,970
|
|
9
|
-
wedata/feature_store/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
wedata/feature_store/common/log/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
wedata/feature_store/common/log/logger.py,sha256=c45DlIbIuwRP2na3ZXsncpHV5KUltqfyKzIgG9GG3g4,1151
|
|
12
|
-
wedata/feature_store/common/protos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
wedata/feature_store/common/protos/feature_store_pb2.py,sha256=oMIUGGeGNP84g_nFqOQwTXjV1GiU2ehSOy7CyFu2__g,4207
|
|
14
|
-
wedata/feature_store/common/store_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
wedata/feature_store/common/store_config/redis.py,sha256=9R5npM2s1u0o9IakmpbRsFdJC0vNar_uvA62OLWuXBs,1145
|
|
16
|
-
wedata/feature_store/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
wedata/feature_store/constants/constants.py,sha256=KY_EYyVH-rao3-rHUbdoayz7lcsGyiMDI44Ib7KFCKc,1959
|
|
18
|
-
wedata/feature_store/constants/engine_types.py,sha256=42mI-kNDDtoA4_I3iqDe4FkF2M2l_Bt4Q1V6WUB-_k0,921
|
|
19
|
-
wedata/feature_store/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
-
wedata/feature_store/entities/column_info.py,sha256=-AR6EKHwgoqIkRHFyguxVEtnYt6fvusWHkEjF4kvS0A,5141
|
|
21
|
-
wedata/feature_store/entities/environment_variables.py,sha256=ZEFml5H9MQuzBKM074mUrFYu-Sga4Knmxqiwpke2WGc,1679
|
|
22
|
-
wedata/feature_store/entities/feature.py,sha256=wX8fTBlJq3GYdj9rrBDCY3kFgcVBBAiOOZdxEhnQkNQ,1241
|
|
23
|
-
wedata/feature_store/entities/feature_column_info.py,sha256=ZAS_I-MDg2ofCv3nwYvGCQrrpEljzrh_L1D-gqOV_mM,2407
|
|
24
|
-
wedata/feature_store/entities/feature_function.py,sha256=R17INrCE-U_Uj9KLbFz69aYlOkTETTwQHMMo470F4lQ,1865
|
|
25
|
-
wedata/feature_store/entities/feature_lookup.py,sha256=UYmYCzkQ1_KuooybS3F-7HDcjBMPZ72InL06UTHbEtw,8749
|
|
26
|
-
wedata/feature_store/entities/feature_spec.py,sha256=Z2SXE_LObjNY3q5yBVKPXGTUiMZy7zaI6-ZbAoFlwG8,21769
|
|
27
|
-
wedata/feature_store/entities/feature_spec_constants.py,sha256=YWDBfRiNDe6fUJFUBo3V4WYg2xsljoPAE-ZejfFZCgM,785
|
|
28
|
-
wedata/feature_store/entities/feature_table.py,sha256=nHCCd7WUryROt9oTJpYkT-KiGbKcQd7BEE9L2_1dhYw,4107
|
|
29
|
-
wedata/feature_store/entities/feature_table_info.py,sha256=yJ1P3AYaPiHW6ehCbMWhndzguHJqJKWfeFwYjwTLt2U,1481
|
|
30
|
-
wedata/feature_store/entities/function_info.py,sha256=yDwIzTrBR-ECWubgeoy48SYZfdY7P0JcraZnWGCW0ag,2752
|
|
31
|
-
wedata/feature_store/entities/on_demand_column_info.py,sha256=a44ep-f3FOruWNXl3c8v7733rNuoKXJaHTv1aqF905s,1739
|
|
32
|
-
wedata/feature_store/entities/source_data_column_info.py,sha256=FyBmBPUSvc2S2OPFTvsQf2AdS-KFGkYBmd4yL_Vur8M,702
|
|
33
|
-
wedata/feature_store/entities/training_set.py,sha256=ylt1h6Z_xU8hKYvnvd80CeewTGSN68-_kvFpoliwH7s,5679
|
|
34
|
-
wedata/feature_store/feast_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
wedata/feature_store/feast_client/feast_client.py,sha256=mCv-OiKehfgcOJhJV0wXMRs5d7e2zEBYmVmDguk0rxU,20728
|
|
36
|
-
wedata/feature_store/feature_table_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
-
wedata/feature_store/feature_table_client/feature_table_client.py,sha256=qyr-jS-nIyphFVZXcUXV_HSfAu-8c19f0b8iG5rYsl8,42669
|
|
38
|
-
wedata/feature_store/spark_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
wedata/feature_store/spark_client/spark_client.py,sha256=aTenEqfZoJYMrph98qjNHZ-H4dgNKnMaH14st8bCVRQ,11797
|
|
40
|
-
wedata/feature_store/training_set_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
-
wedata/feature_store/training_set_client/training_set_client.py,sha256=Ja_W1SKWKueW6wmwDx-623mfpwKQICm6A-ec_jgOFt4,23707
|
|
42
|
-
wedata/feature_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
-
wedata/feature_store/utils/common_utils.py,sha256=vkpoXxZTd6S-2MgdyTQZ6P_ckdqNSK50ECuVBG2BwfI,12314
|
|
44
|
-
wedata/feature_store/utils/env_utils.py,sha256=NzJjT7WAXfw0Gksad3nSEeUDSAXfsKkC-v7aY6kzrpU,2888
|
|
45
|
-
wedata/feature_store/utils/feature_lookup_utils.py,sha256=mNV6RhBdpv1iTZduCA9YwXwkeJCwU5MFQ1MkFeD9IhY,22003
|
|
46
|
-
wedata/feature_store/utils/feature_spec_utils.py,sha256=j8t-zel2_r8Q9m88BmFKkHMdkGNIduWJB-28OZDASRY,11613
|
|
47
|
-
wedata/feature_store/utils/feature_utils.py,sha256=KKq28bVB_lCuhnR9Hk6JegJBOVgcelWlvrRM-F9onkA,2796
|
|
48
|
-
wedata/feature_store/utils/on_demand_utils.py,sha256=pazZRG5c0Se08MV_inBddIeX4Q9xlVN_H9SC_WK3xzs,4260
|
|
49
|
-
wedata/feature_store/utils/schema_utils.py,sha256=y6EYY1pUxjVg6MP4C7avdW8ZEBBaDo1YTV2CmPF4i8o,4491
|
|
50
|
-
wedata/feature_store/utils/signature_utils.py,sha256=SZFufd19m0jmGnOLmAl3JPKZC-qHq-wQezh6G7HOMfc,7773
|
|
51
|
-
wedata/feature_store/utils/topological_sort.py,sha256=ebzKxmxeCLk9seB1zR0ASCGXsZsa-DjxJeTc4KUadtg,6475
|
|
52
|
-
wedata/feature_store/utils/training_set_utils.py,sha256=MYsPZS1d9HKswHgjgxD8K7H9N3dWPyyTTx20Mkp4PVU,22497
|
|
53
|
-
wedata/feature_store/utils/uc_utils.py,sha256=5jngdLT8quP1lfGHN_SSFQQlcOh_sUB9M1varCgdFwg,11436
|
|
54
|
-
wedata/feature_store/utils/validation_utils.py,sha256=lJe6HCg5v5CZxH_pvT-vpGhCpo66LT2erXraHE2T0iI,2584
|
|
55
|
-
wedata/tempo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
-
wedata/tempo/interpol.py,sha256=3JF8dwcdKv2o10FN45aefgvxR5DjlR6FJAXrbAiGCro,16423
|
|
57
|
-
wedata/tempo/intervals.py,sha256=L2ao7LlgQmfDTFwnBoFLXeuygSvwtIKXL52thiD80Yw,44078
|
|
58
|
-
wedata/tempo/io.py,sha256=KWIn6IBSkuBxr8QCcpdZ6NFX_49-8UQdGftmZgs_ujw,1872
|
|
59
|
-
wedata/tempo/ml.py,sha256=WtGa2szn6PditvZsTZoxo7wFDe4k1SRoMZ-jgNGIjvE,4323
|
|
60
|
-
wedata/tempo/resample.py,sha256=h81RVVmCl4ect-YKE-KZZHPDi1rGI3sh-YIb-Btz0ck,9698
|
|
61
|
-
wedata/tempo/tsdf.py,sha256=S4lZfxhSRFiezYoYS6gvGsl1mZA3zp-MWEKFHYZpDg0,70968
|
|
62
|
-
wedata/tempo/utils.py,sha256=I9I6l2DMwUoY213L04Yc1UR_zTWgSkj1BVo4ZwzQd4Y,7977
|
|
63
|
-
tencent_wedata_feature_engineering_dev-0.1.49.dist-info/METADATA,sha256=bNbO1CncVzLyyM2-pkch4bdSnAe75f8cZTkUKn0z4aE,582
|
|
64
|
-
tencent_wedata_feature_engineering_dev-0.1.49.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
65
|
-
tencent_wedata_feature_engineering_dev-0.1.49.dist-info/top_level.txt,sha256=Xa0v1rh__RvfVTVDirW5r5UBKg7ZO_iuTeXfp8MNo2A,7
|
|
66
|
-
tencent_wedata_feature_engineering_dev-0.1.49.dist-info/RECORD,,
|
|
File without changes
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
|
|
3
|
-
from tencentcloud.wedata.v20210820.wedata_client import WedataClient
|
|
4
|
-
from tencentcloud.wedata.v20250806.wedata_client import WedataClient as WedataClientV2
|
|
5
|
-
from tencentcloud.common import credential
|
|
6
|
-
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
|
|
7
|
-
from wedata.feature_store.cloud_sdk_client.utils import get_client_profile, set_request_header, is_mock
|
|
8
|
-
import wedata.feature_store.cloud_sdk_client.models as models
|
|
9
|
-
from wedata.feature_store.common.log.logger import get_logger
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class FeatureCloudSDK:
|
|
13
|
-
def __init__(self, secret_id: str, secret_key: str, region: str):
|
|
14
|
-
self._client = WedataClient(credential.Credential(secret_id, secret_key), region, get_client_profile())
|
|
15
|
-
self._client_v2 = WedataClientV2(credential.Credential(secret_id, secret_key), region, get_client_profile())
|
|
16
|
-
|
|
17
|
-
def CreateOnlineFeatureTable(self, request: models.CreateOnlineFeatureTableRequest) -> 'models.CreateOnlineFeatureTableResponse':
|
|
18
|
-
"""
|
|
19
|
-
创建在线特征表
|
|
20
|
-
Args:
|
|
21
|
-
request: 创建请求参数
|
|
22
|
-
|
|
23
|
-
Returns:
|
|
24
|
-
创建结果响应
|
|
25
|
-
"""
|
|
26
|
-
logger = get_logger()
|
|
27
|
-
if is_mock():
|
|
28
|
-
logger.debug("Mock CreateOnlineFeatureTable API")
|
|
29
|
-
return models.CreateOnlineFeatureTableResponse()
|
|
30
|
-
try:
|
|
31
|
-
params = request._serialize()
|
|
32
|
-
headers = set_request_header(request.headers)
|
|
33
|
-
logger.debug(f"CreateOnlineFeatureTable params: {params}")
|
|
34
|
-
logger.debug(f"CreateOnlineFeatureTable headers: {headers}")
|
|
35
|
-
self._client._apiVersion = "2021-08-20"
|
|
36
|
-
body = self._client.call("CreateOnlineFeatureTable", params, headers=headers)
|
|
37
|
-
response = json.loads(body)
|
|
38
|
-
model = models.CreateOnlineFeatureTableResponse()
|
|
39
|
-
model._deserialize(response["Response"])
|
|
40
|
-
logger.debug(f"CreateOnlineFeatureTable Response: {response}")
|
|
41
|
-
return model
|
|
42
|
-
except Exception as e:
|
|
43
|
-
if isinstance(e, TencentCloudSDKException):
|
|
44
|
-
raise
|
|
45
|
-
else:
|
|
46
|
-
raise TencentCloudSDKException(type(e).__name__, str(e))
|
|
47
|
-
|
|
48
|
-
def DescribeNormalSchedulerExecutorGroups(self, request: models.DescribeNormalSchedulerExecutorGroupsRequest) -> 'models.DescribeNormalSchedulerExecutorGroupsResponse':
|
|
49
|
-
"""
|
|
50
|
-
查询普通调度器执行器组
|
|
51
|
-
Args:
|
|
52
|
-
request: 查询请求参数
|
|
53
|
-
|
|
54
|
-
Returns:
|
|
55
|
-
查询结果响应
|
|
56
|
-
"""
|
|
57
|
-
logger = get_logger()
|
|
58
|
-
if is_mock():
|
|
59
|
-
logger.debug("Mock DescribeNormalSchedulerExecutorGroups API")
|
|
60
|
-
return models.DescribeNormalSchedulerExecutorGroupsResponse()
|
|
61
|
-
|
|
62
|
-
try:
|
|
63
|
-
params = request._serialize()
|
|
64
|
-
headers = set_request_header(request.headers)
|
|
65
|
-
logger.debug(f"DescribeNormalSchedulerExecutorGroups params: {params}")
|
|
66
|
-
logger.debug(f"DescribeNormalSchedulerExecutorGroups headers: {headers}")
|
|
67
|
-
self._client._apiVersion = "2021-08-20"
|
|
68
|
-
body = self._client.call("DescribeNormalSchedulerExecutorGroups", params, headers=headers)
|
|
69
|
-
response = json.loads(body)
|
|
70
|
-
model = models.DescribeNormalSchedulerExecutorGroupsResponse()
|
|
71
|
-
model._deserialize(response["Response"])
|
|
72
|
-
logger.debug(f"DescribeNormalSchedulerExecutorGroups Response: {response}")
|
|
73
|
-
return model
|
|
74
|
-
except Exception as e:
|
|
75
|
-
if isinstance(e, TencentCloudSDKException):
|
|
76
|
-
raise
|
|
77
|
-
else:
|
|
78
|
-
raise TencentCloudSDKException(type(e).__name__, str(e))
|
|
79
|
-
|
|
80
|
-
def RefreshFeatureTable(self, request: models.RefreshFeatureTableRequest) -> 'models.RefreshFeatureTableResponse':
|
|
81
|
-
"""
|
|
82
|
-
刷新特征表
|
|
83
|
-
Args:
|
|
84
|
-
request: 刷新请求参数
|
|
85
|
-
Returns:
|
|
86
|
-
刷新结果响应
|
|
87
|
-
"""
|
|
88
|
-
logger = get_logger()
|
|
89
|
-
if is_mock():
|
|
90
|
-
logger.debug("Mock RefreshFeatureTable API")
|
|
91
|
-
return models.RefreshFeatureTableResponse()
|
|
92
|
-
try:
|
|
93
|
-
params = request._serialize()
|
|
94
|
-
headers = set_request_header(request.headers)
|
|
95
|
-
logger.debug(f"RefreshFeatureTable params: {params}")
|
|
96
|
-
logger.debug(f"RefreshFeatureTable headers: {headers}")
|
|
97
|
-
self._client_v2._apiVersion = "2025-08-06"
|
|
98
|
-
body = self._client_v2.call("RefreshFeatureTable", params, headers=headers)
|
|
99
|
-
response = json.loads(body)
|
|
100
|
-
model = models.RefreshFeatureTableResponse()
|
|
101
|
-
model._deserialize(response["Response"])
|
|
102
|
-
logger.debug(f"RefreshFeatureTable Response: {response}")
|
|
103
|
-
return model
|
|
104
|
-
except Exception as e:
|
|
105
|
-
if isinstance(e, TencentCloudSDKException):
|
|
106
|
-
raise
|
|
107
|
-
else:
|
|
108
|
-
raise TencentCloudSDKException(type(e).__name__, str(e))
|