tencent-wedata-feature-engineering-dev 0.1.49__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tencent-wedata-feature-engineering-dev might be problematic. Click here for more details.

Files changed (38) hide show
  1. {tencent_wedata_feature_engineering_dev-0.1.49.dist-info → tencent_wedata_feature_engineering_dev-0.2.0.dist-info}/METADATA +10 -8
  2. tencent_wedata_feature_engineering_dev-0.2.0.dist-info/RECORD +46 -0
  3. {tencent_wedata_feature_engineering_dev-0.1.49.dist-info → tencent_wedata_feature_engineering_dev-0.2.0.dist-info}/WHEEL +1 -1
  4. wedata/feature_store/client.py +28 -92
  5. wedata/feature_store/constants/constants.py +2 -5
  6. wedata/feature_store/entities/feature_lookup.py +0 -17
  7. wedata/feature_store/entities/feature_spec.py +2 -2
  8. wedata/feature_store/entities/feature_table.py +1 -5
  9. wedata/feature_store/entities/function_info.py +4 -1
  10. wedata/feature_store/feature_table_client/feature_table_client.py +53 -528
  11. wedata/feature_store/spark_client/spark_client.py +15 -41
  12. wedata/feature_store/training_set_client/training_set_client.py +10 -9
  13. wedata/feature_store/utils/common_utils.py +4 -48
  14. wedata/feature_store/utils/feature_lookup_utils.py +43 -37
  15. wedata/feature_store/utils/feature_spec_utils.py +1 -1
  16. wedata/feature_store/utils/uc_utils.py +1 -1
  17. tencent_wedata_feature_engineering_dev-0.1.49.dist-info/RECORD +0 -66
  18. wedata/feature_store/cloud_sdk_client/__init__.py +0 -0
  19. wedata/feature_store/cloud_sdk_client/client.py +0 -108
  20. wedata/feature_store/cloud_sdk_client/models.py +0 -686
  21. wedata/feature_store/cloud_sdk_client/utils.py +0 -39
  22. wedata/feature_store/common/log/__init__.py +0 -0
  23. wedata/feature_store/common/log/logger.py +0 -40
  24. wedata/feature_store/common/store_config/__init__.py +0 -0
  25. wedata/feature_store/common/store_config/redis.py +0 -48
  26. wedata/feature_store/constants/engine_types.py +0 -34
  27. wedata/feature_store/feast_client/__init__.py +0 -0
  28. wedata/feature_store/feast_client/feast_client.py +0 -487
  29. wedata/feature_store/utils/env_utils.py +0 -108
  30. wedata/tempo/__init__.py +0 -0
  31. wedata/tempo/interpol.py +0 -448
  32. wedata/tempo/intervals.py +0 -1331
  33. wedata/tempo/io.py +0 -61
  34. wedata/tempo/ml.py +0 -129
  35. wedata/tempo/resample.py +0 -318
  36. wedata/tempo/tsdf.py +0 -1720
  37. wedata/tempo/utils.py +0 -254
  38. {tencent_wedata_feature_engineering_dev-0.1.49.dist-info → tencent_wedata_feature_engineering_dev-0.2.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,6 @@
1
1
  from collections import defaultdict
2
2
  from typing import Optional, Any, Dict, List
3
3
 
4
- import logging
5
4
  from pyspark.sql import SparkSession, DataFrame
6
5
  from pyspark.sql.catalog import Column
7
6
  from pyspark.sql.functions import when, isnull
@@ -15,7 +14,7 @@ from wedata.feature_store.constants.constants import (
15
14
  from wedata.feature_store.entities.feature import Feature
16
15
  from wedata.feature_store.entities.feature_table import FeatureTable
17
16
  from wedata.feature_store.entities.function_info import FunctionParameterInfo, FunctionInfo
18
- from wedata.feature_store.utils.common_utils import unsanitize_identifier, check_spark_table_exists, check_package_version
17
+ from wedata.feature_store.utils.common_utils import unsanitize_identifier
19
18
 
20
19
 
21
20
  class SparkClient:
@@ -29,12 +28,10 @@ class SparkClient:
29
28
  返回:
30
29
  str: 当前catalog名称,如果未设置则返回None
31
30
  """
32
- _, ok, _ = check_package_version("pyspark", "3.4.0", ">=")
33
- if ok:
31
+ try:
34
32
  return unsanitize_identifier(self._spark.catalog.currentCatalog())
35
- else:
36
- catalog = self._spark.sql("SELECT current_catalog()").first()[0]
37
- return unsanitize_identifier(catalog)
33
+ except Exception:
34
+ return None
38
35
 
39
36
  def get_current_database(self):
40
37
  """
@@ -52,9 +49,13 @@ class SparkClient:
52
49
  # 捕获所有异常并返回None
53
50
  return None
54
51
 
52
+
53
+
54
+
55
55
  def createDataFrame(self, data, schema) -> DataFrame:
56
56
  return self._spark.createDataFrame(data, schema)
57
57
 
58
+
58
59
  def read_table(self, table_name):
59
60
  """读取Spark表数据
60
61
 
@@ -67,28 +68,20 @@ class SparkClient:
67
68
  Raises:
68
69
  ValueError: 当表不存在或读取失败时抛出
69
70
  """
70
- table_name = _translate_spark_table_name(table_name)
71
71
  try:
72
72
  # 验证表是否存在
73
- if not check_spark_table_exists(self._spark, table_name):
73
+ if not self._spark.catalog.tableExists(table_name):
74
74
  raise ValueError(f"Table does not exist: {table_name}")
75
+
75
76
  return self._spark.table(table_name)
76
77
 
77
78
  except Exception as e:
78
79
  raise ValueError(f"Failed to read table {table_name}: {str(e)}")
79
80
 
81
+
80
82
  def get_features(self, table_name):
81
83
  # 查询列信息
82
- table_name = _translate_spark_table_name(table_name)
83
- split = table_name.split(".")
84
- if len(split) == 2:
85
- # db.table_name
86
- columns = self._spark.catalog.listColumns(tableName=split[1], dbName=split[0])
87
- elif len(split) == 3:
88
- # catalog.db.table_name
89
- columns = self._spark.catalog.listColumns(tableName=split[2], dbName=split[1])
90
- else:
91
- columns = self._spark.catalog.listColumns(tableName=table_name)
84
+ columns = self._spark.catalog.listColumns(tableName=table_name)
92
85
  return [
93
86
  Feature(
94
87
  feature_table=table_name,
@@ -100,19 +93,15 @@ class SparkClient:
100
93
  ]
101
94
 
102
95
  def get_feature_table(self, table_name):
103
- """
104
- DLC支持table_name为catalog.schema.table
105
- EMR支持table_name为schema.table
106
- """
96
+ # 获取表元数据
97
+ table = self._spark.catalog.getTable(table_name)
107
98
 
108
- table_name = _translate_spark_table_name(table_name)
109
- # table = self._spark.catalog.getTable(table_name)
110
99
  # 获取表配置信息
111
100
  properties = self._spark.sql(f"SHOW TBLPROPERTIES {table_name}").collect()
112
101
  primary_key_str = next((row.value for row in properties if row.key == "primaryKeys"), None)
113
102
  primary_keys = primary_key_str.split(",") if primary_key_str else []
114
103
  table_id = next((row.value for row in properties if row.key == "table_id"), table_name)
115
- description = next((row.value for row in properties if row.key == "comment"), None)
104
+ description = table.description or next((row.value for row in properties if row.key == "comment"), None)
116
105
  timestamp_keys_str = next((row.value for row in properties if row.key == "timestampKeys"), None)
117
106
  timestamp_keys = timestamp_keys_str.split(",") if timestamp_keys_str else []
118
107
  # 获取分区字段信息
@@ -269,18 +258,3 @@ class SparkClient:
269
258
  kwargs[_PREBUILT_ENV_URI] = prebuilt_env_uri
270
259
 
271
260
  return spark_udf(self._spark, model_uri, **kwargs)
272
-
273
-
274
- def _translate_spark_table_name(table_name):
275
- from wedata.feature_store.constants.engine_types import judge_engine_type, CalculateEngineTypes
276
- # 获取表元数据
277
- if judge_engine_type() == CalculateEngineTypes.EMR:
278
- split_names = table_name.split(".")
279
- # print(f"==== EMR TABLE split len({len(split_names)})")
280
- if len(split_names) <= 2:
281
- return table_name
282
- else:
283
- table_name = ".".join(table_name.split(".")[1:])
284
- return table_name
285
- return table_name
286
-
@@ -309,7 +309,16 @@ class TrainingSetClient:
309
309
  feature_spec.save(data_path)
310
310
 
311
311
  print(f'artifact_path:{artifact_path},data_path:{data_path},conda_env:{conda_env},'
312
- f'signature:{signature},input_example:{input_example}');
312
+ f'signature:{signature},input_example:{input_example}')
313
+
314
+ # 设置实例ID标签
315
+ tdlc_home = os.getenv('TDLC_HOME', '')
316
+ if tdlc_home:
317
+ instance_id = tdlc_home.split('/')[-3]
318
+ mlflow.set_tag("instance_id", instance_id)
319
+
320
+ # 设置实验标签,血缘查询需要
321
+ mlflow.set_experiment_tag(f"instance_id_{instance_id}", instance_id)
313
322
 
314
323
  mlflow.pyfunc.log_model(
315
324
  artifact_path=artifact_path,
@@ -360,7 +369,6 @@ class TrainingSetClient:
360
369
  env_manager: Optional[str] = None,
361
370
  local_uri: Optional[str] = None,
362
371
  params: Optional[dict[str, Any]] = None,
363
- timestamp_key: str = None,
364
372
  **kwargs,
365
373
  ) -> DataFrame:
366
374
  # TODO:ML 待确定是否需要
@@ -424,11 +432,8 @@ class TrainingSetClient:
424
432
 
425
433
  # Validate that columns needed for joining feature tables exist and are not duplicates.
426
434
  feature_input_keys = []
427
- print("====>timestamp_key:", timestamp_key)
428
435
  for fci in feature_spec.feature_column_infos:
429
- print("====>fci:", fci.lookup_key)
430
436
  feature_input_keys.extend([k for k in fci.lookup_key])
431
-
432
437
  on_demand_input_names = uc_utils.get_unique_list_order(
433
438
  [
434
439
  input_name
@@ -442,16 +447,12 @@ class TrainingSetClient:
442
447
  ]
443
448
  # print(f"wedata source_data_names:{source_data_names}")
444
449
 
445
- print("===>source_data_names:", source_data_names)
446
-
447
450
  feature_output_names = [
448
451
  fci.output_name for fci in feature_spec.feature_column_infos
449
452
  ]
450
- print("====>feature_output_names:", feature_output_names)
451
453
  on_demand_output_names = [
452
454
  odci.output_name for odci in feature_spec.on_demand_column_infos
453
455
  ]
454
- print("====>on_demand_output_names:", on_demand_output_names)
455
456
  all_output_names = set(
456
457
  source_data_names + feature_output_names + on_demand_output_names
457
458
  )
@@ -2,12 +2,14 @@
2
2
  通用工具函数
3
3
  """
4
4
  import os
5
+ import json
5
6
  from collections import Counter
6
7
  from datetime import datetime, timezone
7
8
  from functools import wraps
8
9
  from typing import Any, Dict, List, Optional
9
10
  from urllib.parse import urlparse
10
11
 
12
+ import mlflow
11
13
  from mlflow.exceptions import RestException
12
14
  from mlflow.store.artifact.artifact_repository_registry import get_artifact_repository
13
15
  from mlflow.store.artifact.models_artifact_repo import ModelsArtifactRepository
@@ -16,7 +18,6 @@ from mlflow.utils import databricks_utils
16
18
 
17
19
  from wedata.feature_store.constants import constants
18
20
  from wedata.feature_store.constants.constants import MODEL_DATA_PATH_ROOT
19
- from pyspark.sql import SparkSession
20
21
 
21
22
  import logging
22
23
 
@@ -116,7 +117,7 @@ def _get_dbutils():
116
117
  def utc_timestamp_ms_from_iso_datetime_string(date_string: str) -> int:
117
118
  dt = datetime.fromisoformat(date_string)
118
119
  utc_dt = dt.replace(tzinfo=timezone.utc)
119
- return int(1000 * utc_dt.timestamp())
120
+ return 1000 * utc_dt.timestamp()
120
121
 
121
122
 
122
123
  def pip_depependency_pinned_major_version(pip_package_name, major_version):
@@ -303,49 +304,4 @@ def validate_database(database_name):
303
304
  database_name = os.environ.get(constants.WEDATA_DEFAULT_FEATURE_STORE_DATABASE)
304
305
  if database_name is None:
305
306
  raise ValueError("Database_name variable or default database is not set.")
306
- return True
307
-
308
-
309
- def check_package_version(package_name, expected_version, op="=="):
310
- """
311
- 检查指定包的版本是否满足预期版本要求。
312
- Args:
313
- package_name: 包名称
314
- expected_version: 预期版本要求,例如3.5.5
315
- op: 比较运算符,默认为 "=="
316
- Returns:
317
- (是否成功找到包,版本是否匹配,已安装版本)
318
- 如果满足,返回 (True, True, installed_version);否则返回 (True, False, installed_version)。
319
- 如果指定包不存在,返回 (False, False, None)。
320
- """
321
- # 在脚本顶部添加
322
- from packaging import version
323
- import importlib.metadata
324
- try:
325
- installed_version = importlib.metadata.version(package_name)
326
-
327
- if not op:
328
- raise ValueError(f"Invalid op: {op}. need be in ['==', '>', '<', '>=', '<=', '!=', '~=']")
329
- # 支持版本范围检查(如 ">=2.0,<3.0")
330
- # 使用 packaging.version 进行复杂版本`检查
331
- i = version.parse(installed_version)
332
- e = version.parse(expected_version)
333
- return True, eval(f"i{op}e"), installed_version
334
-
335
- except importlib.metadata.PackageNotFoundError:
336
- return False, False, None
337
-
338
-
339
- def check_spark_table_exists(spark_client: SparkSession, full_table_name: str) -> bool:
340
- _, ok, _ = check_package_version("pyspark", "3.5.0", ">=")
341
- try:
342
- return spark_client.catalog.tableExists(full_table_name)
343
- except AttributeError:
344
- split = full_table_name.split(".")
345
- if len(split) == 2:
346
- query = f"SHOW TABLES IN {split[0]} LIKE '{split[1]}'"
347
- elif len(split) == 3:
348
- query = f"SHOW TABLES IN {split[1]} LIKE '{split[2]}'"
349
- else:
350
- query = f"SHOW TABLES LIKE '{full_table_name}'"
351
- return spark_client.sql(query).count() > 0
307
+ return True
@@ -6,10 +6,9 @@ from collections import defaultdict
6
6
  from functools import reduce
7
7
  from typing import Dict, List, Optional, Tuple
8
8
 
9
- from pyspark.sql import DataFrame
9
+ from pyspark.sql import DataFrame, Window
10
10
  from pyspark.sql import functions as F
11
- import pyspark.sql.functions as psf
12
-
11
+ from pyspark.sql.functions import sum, unix_timestamp
13
12
 
14
13
  from wedata.feature_store.entities.environment_variables import BROADCAST_JOIN_THRESHOLD
15
14
  from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
@@ -17,7 +16,7 @@ from wedata.feature_store.entities.feature_lookup import FeatureLookup
17
16
  from wedata.feature_store.entities.feature_spec import FeatureSpec
18
17
  from wedata.feature_store.entities.feature_table import FeatureTable
19
18
 
20
- from wedata.feature_store.utils import uc_utils
19
+ from wedata.feature_store.utils import common_utils, validation_utils, uc_utils
21
20
 
22
21
  _logger = logging.getLogger(__name__)
23
22
 
@@ -75,21 +74,19 @@ def _spark_asof_join_features(
75
74
  else:
76
75
  joined_df = _spark_asof_join_features_tempo(
77
76
  df=df,
78
- feature_df=feature_and_keys,
79
- lookup_keys=df_lookup_keys,
80
- timestamp_key=df_timestamp_lookup_key,
81
- lookback_window=lookback_window_seconds,
77
+ df_lookup_keys=df_lookup_keys,
78
+ df_timestamp_lookup_key=df_timestamp_lookup_key,
79
+ feature_and_keys=feature_and_keys,
82
80
  ft_features=ft_features,
81
+ lookback_window_seconds=lookback_window_seconds,
83
82
  )
84
83
  return joined_df
85
84
 
86
-
87
85
  def _spark_asof_join_features_tempo(
88
86
  df: DataFrame,
89
87
  feature_df: DataFrame,
90
88
  lookup_keys: List[str],
91
89
  timestamp_key: str,
92
- ft_features: List[Tuple[str, str]],
93
90
  lookback_window: Optional[float] = None
94
91
  ) -> DataFrame:
95
92
  """
@@ -101,33 +98,42 @@ def _spark_asof_join_features_tempo(
101
98
  :param lookback_window: 最大回溯时间(秒)
102
99
  :return: 连接后的DataFrame
103
100
  """
104
- from wedata.tempo.tsdf import TSDF
101
+
105
102
  # 1. 只保留键列和时间戳列
106
- df_tsdf = TSDF(df, ts_col=timestamp_key, partition_cols=lookup_keys)
107
- ft_tsdf = TSDF(feature_df, ts_col=timestamp_key, partition_cols=lookup_keys)
108
- # 进行as-of连接
109
- joined_df = df_tsdf.asofJoin(
110
- ft_tsdf,
111
- left_prefix="left",
112
- right_prefix="right",
113
- skipNulls=False,
114
- tolerance=lookback_window
115
- if lookback_window is not None
116
- else None,
117
- ).df
118
-
119
- # 去掉前缀,恢复列名
120
- left_aliases = [
121
- joined_df[f"left_{column_name}"].alias(column_name)
122
- for column_name in df.columns
123
- if column_name not in lookup_keys
124
- ]
125
- right_aliases = [
126
- joined_df[f"right_{output_name}"].alias(output_name)
127
- for (_, output_name) in ft_features
128
- ]
129
- return joined_df.select(lookup_keys + left_aliases + right_aliases)
103
+ df_keys = df.select(lookup_keys + [timestamp_key])
104
+ feature_keys = feature_df.select(lookup_keys + [timestamp_key])
105
+
106
+ # 2. 创建连接条件
107
+ join_cond = [df_keys[k] == feature_keys[k] for k in lookup_keys]
108
+ join_cond = reduce(lambda x, y: x & y, join_cond)
109
+ join_cond &= (df_keys[timestamp_key] >= feature_keys[timestamp_key])
110
+
111
+ if lookback_window:
112
+ join_cond &= (
113
+ (F.unix_timestamp(df_keys[timestamp_key]) -
114
+ F.unix_timestamp(feature_keys[timestamp_key])) <= lookback_window
115
+ )
116
+
117
+ # 3. 执行连接并找出每个主表记录对应的最新特征记录
118
+ joined = df_keys.join(feature_keys, join_cond, "left")
119
+
120
+ # 按主表键分组,找出最大特征时间戳
121
+ window = Window.partitionBy(lookup_keys).orderBy(F.desc(timestamp_key))
122
+ latest_features = (
123
+ joined
124
+ .withColumn("rn", F.row_number().over(window))
125
+ .filter(F.col("rn") == 1)
126
+ .drop("rn")
127
+ )
128
+
129
+ # 4. 最终连接获取完整特征数据
130
+ result = df.join(
131
+ latest_features.select(lookup_keys + [timestamp_key, "feature_col"]),
132
+ lookup_keys + [timestamp_key],
133
+ "left"
134
+ )
130
135
 
136
+ return result
131
137
 
132
138
  def _spark_asof_join_features_native(
133
139
  labels_df: DataFrame,
@@ -184,8 +190,8 @@ def _spark_asof_join_features_native(
184
190
  )
185
191
  if lookback_window_seconds is not None:
186
192
  join_conditions &= (
187
- psf.unix_timestamp(labels_df_keys_only[timestamp_key])
188
- - psf.unix_timestamp(features_df_keys_only["__features_tk"])
193
+ unix_timestamp(labels_df_keys_only[timestamp_key])
194
+ - unix_timestamp(features_df_keys_only["__features_tk"])
189
195
  ) <= lookback_window_seconds
190
196
 
191
197
  # Join labels and features DataFrames
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from dataclasses import dataclass
3
3
  from functools import reduce
4
- from typing import Dict, List, Tuple, Union
4
+ from typing import Dict, List, Set, Tuple, Type, Union
5
5
 
6
6
  import yaml
7
7
  from mlflow.utils.file_utils import YamlSafeDumper
@@ -287,7 +287,7 @@ def utc_timestamp_ms_from_iso_datetime_string(date_string: str) -> int:
287
287
  # The Feature Store backend returns timestamps in milliseconds, so this allows for direct comparisons.
288
288
  dt = datetime.fromisoformat(date_string)
289
289
  utc_dt = dt.replace(tzinfo=timezone.utc)
290
- return int(1000 * utc_dt.timestamp())
290
+ return 1000 * utc_dt.timestamp()
291
291
 
292
292
  def get_unique_list_order(elements: List[Any]) -> List[Any]:
293
293
  """
@@ -1,66 +0,0 @@
1
- wedata/__init__.py,sha256=GYxqkkgH0oH4QtNiOCZHuGkc0sSH1LgEqmhSX6sB4So,200
2
- wedata/feature_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- wedata/feature_store/client.py,sha256=B6fy-PGJZsEHGPE8tDmrTolyioJ_-Po2DK3p_HCW7Sw,19552
4
- wedata/feature_store/mlflow_model.py,sha256=OCUuccOoO0NXWSzIPoGeL03Ha1Q3aQTJW2RlJrTCmzc,554
5
- wedata/feature_store/cloud_sdk_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- wedata/feature_store/cloud_sdk_client/client.py,sha256=B7nCQ_MvbLP4ieT7rsa32FHws2fOG4VQZT5lmQ3Cvzk,4914
7
- wedata/feature_store/cloud_sdk_client/models.py,sha256=7_QUq0kZcrcclRMsIYFoqBrlzVwaHoVY-yU5SHIrJWM,19789
8
- wedata/feature_store/cloud_sdk_client/utils.py,sha256=xwvXJpk2RXbJtgOaXCZQbGRrlzcTRzv27yQFxKp_X84,970
9
- wedata/feature_store/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- wedata/feature_store/common/log/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- wedata/feature_store/common/log/logger.py,sha256=c45DlIbIuwRP2na3ZXsncpHV5KUltqfyKzIgG9GG3g4,1151
12
- wedata/feature_store/common/protos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- wedata/feature_store/common/protos/feature_store_pb2.py,sha256=oMIUGGeGNP84g_nFqOQwTXjV1GiU2ehSOy7CyFu2__g,4207
14
- wedata/feature_store/common/store_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- wedata/feature_store/common/store_config/redis.py,sha256=9R5npM2s1u0o9IakmpbRsFdJC0vNar_uvA62OLWuXBs,1145
16
- wedata/feature_store/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- wedata/feature_store/constants/constants.py,sha256=KY_EYyVH-rao3-rHUbdoayz7lcsGyiMDI44Ib7KFCKc,1959
18
- wedata/feature_store/constants/engine_types.py,sha256=42mI-kNDDtoA4_I3iqDe4FkF2M2l_Bt4Q1V6WUB-_k0,921
19
- wedata/feature_store/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- wedata/feature_store/entities/column_info.py,sha256=-AR6EKHwgoqIkRHFyguxVEtnYt6fvusWHkEjF4kvS0A,5141
21
- wedata/feature_store/entities/environment_variables.py,sha256=ZEFml5H9MQuzBKM074mUrFYu-Sga4Knmxqiwpke2WGc,1679
22
- wedata/feature_store/entities/feature.py,sha256=wX8fTBlJq3GYdj9rrBDCY3kFgcVBBAiOOZdxEhnQkNQ,1241
23
- wedata/feature_store/entities/feature_column_info.py,sha256=ZAS_I-MDg2ofCv3nwYvGCQrrpEljzrh_L1D-gqOV_mM,2407
24
- wedata/feature_store/entities/feature_function.py,sha256=R17INrCE-U_Uj9KLbFz69aYlOkTETTwQHMMo470F4lQ,1865
25
- wedata/feature_store/entities/feature_lookup.py,sha256=UYmYCzkQ1_KuooybS3F-7HDcjBMPZ72InL06UTHbEtw,8749
26
- wedata/feature_store/entities/feature_spec.py,sha256=Z2SXE_LObjNY3q5yBVKPXGTUiMZy7zaI6-ZbAoFlwG8,21769
27
- wedata/feature_store/entities/feature_spec_constants.py,sha256=YWDBfRiNDe6fUJFUBo3V4WYg2xsljoPAE-ZejfFZCgM,785
28
- wedata/feature_store/entities/feature_table.py,sha256=nHCCd7WUryROt9oTJpYkT-KiGbKcQd7BEE9L2_1dhYw,4107
29
- wedata/feature_store/entities/feature_table_info.py,sha256=yJ1P3AYaPiHW6ehCbMWhndzguHJqJKWfeFwYjwTLt2U,1481
30
- wedata/feature_store/entities/function_info.py,sha256=yDwIzTrBR-ECWubgeoy48SYZfdY7P0JcraZnWGCW0ag,2752
31
- wedata/feature_store/entities/on_demand_column_info.py,sha256=a44ep-f3FOruWNXl3c8v7733rNuoKXJaHTv1aqF905s,1739
32
- wedata/feature_store/entities/source_data_column_info.py,sha256=FyBmBPUSvc2S2OPFTvsQf2AdS-KFGkYBmd4yL_Vur8M,702
33
- wedata/feature_store/entities/training_set.py,sha256=ylt1h6Z_xU8hKYvnvd80CeewTGSN68-_kvFpoliwH7s,5679
34
- wedata/feature_store/feast_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- wedata/feature_store/feast_client/feast_client.py,sha256=mCv-OiKehfgcOJhJV0wXMRs5d7e2zEBYmVmDguk0rxU,20728
36
- wedata/feature_store/feature_table_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- wedata/feature_store/feature_table_client/feature_table_client.py,sha256=qyr-jS-nIyphFVZXcUXV_HSfAu-8c19f0b8iG5rYsl8,42669
38
- wedata/feature_store/spark_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- wedata/feature_store/spark_client/spark_client.py,sha256=aTenEqfZoJYMrph98qjNHZ-H4dgNKnMaH14st8bCVRQ,11797
40
- wedata/feature_store/training_set_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- wedata/feature_store/training_set_client/training_set_client.py,sha256=Ja_W1SKWKueW6wmwDx-623mfpwKQICm6A-ec_jgOFt4,23707
42
- wedata/feature_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
- wedata/feature_store/utils/common_utils.py,sha256=vkpoXxZTd6S-2MgdyTQZ6P_ckdqNSK50ECuVBG2BwfI,12314
44
- wedata/feature_store/utils/env_utils.py,sha256=NzJjT7WAXfw0Gksad3nSEeUDSAXfsKkC-v7aY6kzrpU,2888
45
- wedata/feature_store/utils/feature_lookup_utils.py,sha256=mNV6RhBdpv1iTZduCA9YwXwkeJCwU5MFQ1MkFeD9IhY,22003
46
- wedata/feature_store/utils/feature_spec_utils.py,sha256=j8t-zel2_r8Q9m88BmFKkHMdkGNIduWJB-28OZDASRY,11613
47
- wedata/feature_store/utils/feature_utils.py,sha256=KKq28bVB_lCuhnR9Hk6JegJBOVgcelWlvrRM-F9onkA,2796
48
- wedata/feature_store/utils/on_demand_utils.py,sha256=pazZRG5c0Se08MV_inBddIeX4Q9xlVN_H9SC_WK3xzs,4260
49
- wedata/feature_store/utils/schema_utils.py,sha256=y6EYY1pUxjVg6MP4C7avdW8ZEBBaDo1YTV2CmPF4i8o,4491
50
- wedata/feature_store/utils/signature_utils.py,sha256=SZFufd19m0jmGnOLmAl3JPKZC-qHq-wQezh6G7HOMfc,7773
51
- wedata/feature_store/utils/topological_sort.py,sha256=ebzKxmxeCLk9seB1zR0ASCGXsZsa-DjxJeTc4KUadtg,6475
52
- wedata/feature_store/utils/training_set_utils.py,sha256=MYsPZS1d9HKswHgjgxD8K7H9N3dWPyyTTx20Mkp4PVU,22497
53
- wedata/feature_store/utils/uc_utils.py,sha256=5jngdLT8quP1lfGHN_SSFQQlcOh_sUB9M1varCgdFwg,11436
54
- wedata/feature_store/utils/validation_utils.py,sha256=lJe6HCg5v5CZxH_pvT-vpGhCpo66LT2erXraHE2T0iI,2584
55
- wedata/tempo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- wedata/tempo/interpol.py,sha256=3JF8dwcdKv2o10FN45aefgvxR5DjlR6FJAXrbAiGCro,16423
57
- wedata/tempo/intervals.py,sha256=L2ao7LlgQmfDTFwnBoFLXeuygSvwtIKXL52thiD80Yw,44078
58
- wedata/tempo/io.py,sha256=KWIn6IBSkuBxr8QCcpdZ6NFX_49-8UQdGftmZgs_ujw,1872
59
- wedata/tempo/ml.py,sha256=WtGa2szn6PditvZsTZoxo7wFDe4k1SRoMZ-jgNGIjvE,4323
60
- wedata/tempo/resample.py,sha256=h81RVVmCl4ect-YKE-KZZHPDi1rGI3sh-YIb-Btz0ck,9698
61
- wedata/tempo/tsdf.py,sha256=S4lZfxhSRFiezYoYS6gvGsl1mZA3zp-MWEKFHYZpDg0,70968
62
- wedata/tempo/utils.py,sha256=I9I6l2DMwUoY213L04Yc1UR_zTWgSkj1BVo4ZwzQd4Y,7977
63
- tencent_wedata_feature_engineering_dev-0.1.49.dist-info/METADATA,sha256=bNbO1CncVzLyyM2-pkch4bdSnAe75f8cZTkUKn0z4aE,582
64
- tencent_wedata_feature_engineering_dev-0.1.49.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
65
- tencent_wedata_feature_engineering_dev-0.1.49.dist-info/top_level.txt,sha256=Xa0v1rh__RvfVTVDirW5r5UBKg7ZO_iuTeXfp8MNo2A,7
66
- tencent_wedata_feature_engineering_dev-0.1.49.dist-info/RECORD,,
File without changes
@@ -1,108 +0,0 @@
1
- import json
2
-
3
- from tencentcloud.wedata.v20210820.wedata_client import WedataClient
4
- from tencentcloud.wedata.v20250806.wedata_client import WedataClient as WedataClientV2
5
- from tencentcloud.common import credential
6
- from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
7
- from wedata.feature_store.cloud_sdk_client.utils import get_client_profile, set_request_header, is_mock
8
- import wedata.feature_store.cloud_sdk_client.models as models
9
- from wedata.feature_store.common.log.logger import get_logger
10
-
11
-
12
- class FeatureCloudSDK:
13
- def __init__(self, secret_id: str, secret_key: str, region: str):
14
- self._client = WedataClient(credential.Credential(secret_id, secret_key), region, get_client_profile())
15
- self._client_v2 = WedataClientV2(credential.Credential(secret_id, secret_key), region, get_client_profile())
16
-
17
- def CreateOnlineFeatureTable(self, request: models.CreateOnlineFeatureTableRequest) -> 'models.CreateOnlineFeatureTableResponse':
18
- """
19
- 创建在线特征表
20
- Args:
21
- request: 创建请求参数
22
-
23
- Returns:
24
- 创建结果响应
25
- """
26
- logger = get_logger()
27
- if is_mock():
28
- logger.debug("Mock CreateOnlineFeatureTable API")
29
- return models.CreateOnlineFeatureTableResponse()
30
- try:
31
- params = request._serialize()
32
- headers = set_request_header(request.headers)
33
- logger.debug(f"CreateOnlineFeatureTable params: {params}")
34
- logger.debug(f"CreateOnlineFeatureTable headers: {headers}")
35
- self._client._apiVersion = "2021-08-20"
36
- body = self._client.call("CreateOnlineFeatureTable", params, headers=headers)
37
- response = json.loads(body)
38
- model = models.CreateOnlineFeatureTableResponse()
39
- model._deserialize(response["Response"])
40
- logger.debug(f"CreateOnlineFeatureTable Response: {response}")
41
- return model
42
- except Exception as e:
43
- if isinstance(e, TencentCloudSDKException):
44
- raise
45
- else:
46
- raise TencentCloudSDKException(type(e).__name__, str(e))
47
-
48
- def DescribeNormalSchedulerExecutorGroups(self, request: models.DescribeNormalSchedulerExecutorGroupsRequest) -> 'models.DescribeNormalSchedulerExecutorGroupsResponse':
49
- """
50
- 查询普通调度器执行器组
51
- Args:
52
- request: 查询请求参数
53
-
54
- Returns:
55
- 查询结果响应
56
- """
57
- logger = get_logger()
58
- if is_mock():
59
- logger.debug("Mock DescribeNormalSchedulerExecutorGroups API")
60
- return models.DescribeNormalSchedulerExecutorGroupsResponse()
61
-
62
- try:
63
- params = request._serialize()
64
- headers = set_request_header(request.headers)
65
- logger.debug(f"DescribeNormalSchedulerExecutorGroups params: {params}")
66
- logger.debug(f"DescribeNormalSchedulerExecutorGroups headers: {headers}")
67
- self._client._apiVersion = "2021-08-20"
68
- body = self._client.call("DescribeNormalSchedulerExecutorGroups", params, headers=headers)
69
- response = json.loads(body)
70
- model = models.DescribeNormalSchedulerExecutorGroupsResponse()
71
- model._deserialize(response["Response"])
72
- logger.debug(f"DescribeNormalSchedulerExecutorGroups Response: {response}")
73
- return model
74
- except Exception as e:
75
- if isinstance(e, TencentCloudSDKException):
76
- raise
77
- else:
78
- raise TencentCloudSDKException(type(e).__name__, str(e))
79
-
80
- def RefreshFeatureTable(self, request: models.RefreshFeatureTableRequest) -> 'models.RefreshFeatureTableResponse':
81
- """
82
- 刷新特征表
83
- Args:
84
- request: 刷新请求参数
85
- Returns:
86
- 刷新结果响应
87
- """
88
- logger = get_logger()
89
- if is_mock():
90
- logger.debug("Mock RefreshFeatureTable API")
91
- return models.RefreshFeatureTableResponse()
92
- try:
93
- params = request._serialize()
94
- headers = set_request_header(request.headers)
95
- logger.debug(f"RefreshFeatureTable params: {params}")
96
- logger.debug(f"RefreshFeatureTable headers: {headers}")
97
- self._client_v2._apiVersion = "2025-08-06"
98
- body = self._client_v2.call("RefreshFeatureTable", params, headers=headers)
99
- response = json.loads(body)
100
- model = models.RefreshFeatureTableResponse()
101
- model._deserialize(response["Response"])
102
- logger.debug(f"RefreshFeatureTable Response: {response}")
103
- return model
104
- except Exception as e:
105
- if isinstance(e, TencentCloudSDKException):
106
- raise
107
- else:
108
- raise TencentCloudSDKException(type(e).__name__, str(e))