wedata-pre-code 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ """WeData Pre-Code Library
2
+
3
+ WeData平台的预执行代码库,为机器学习实验提供与MLflow的深度集成和WeData平台的功能增强。
4
+ """
5
+
6
+ from .client import PreCodeClient
7
+
8
+ __all__ = ["PreCodeClient"]
9
+
10
+ __version__ = "1.0.8"
@@ -0,0 +1,37 @@
1
+ __doc__ = """
2
+ Wedata 预执行代码客户端
3
+ 如果要初始化Wedata2的预执行代码客户端,请使用init_wedata2_pre_code方法
4
+ 如果要初始化Wedata3的预执行代码客户端,请使用init_wedata3_pre_code方法
5
+ """
6
+
7
+
8
+ class PreCodeClient:
9
+ """
10
+ Wedata 预执行代码客户端
11
+ """
12
+
13
+ def init_wedata2_pre_code(self, **kwargs):
14
+ """
15
+ 初始化Wedata2的预执行代码客户端
16
+ :param kwargs: Wedata2的预执行代码客户端的参数
17
+ :return: Wedata2PreCodeClient实例
18
+ """
19
+ from wedata_pre_code.wedata2.client import Wedata2PreCodeClient
20
+
21
+ client = Wedata2PreCodeClient(**kwargs)
22
+ client.init()
23
+
24
+ return client
25
+
26
+ def init_wedata3_pre_code(self, **kwargs):
27
+ """
28
+ 初始化Wedata3的预执行代码客户端
29
+ :param kwargs: Wedata3的预执行代码客户端的参数
30
+ :return: Wedata3PreCodeClient实例
31
+ """
32
+ from wedata_pre_code.wedata3.client import Wedata3PreCodeClient
33
+
34
+ client = Wedata3PreCodeClient(**kwargs)
35
+ client.init()
36
+
37
+ return client
File without changes
@@ -0,0 +1,16 @@
1
+ __doc__ = """
2
+ 基础客户端类
3
+ """
4
+
5
+ from abc import abstractmethod
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class BaseClient(BaseModel):
10
+ """
11
+ 基础客户端类
12
+ """
13
+
14
+ @abstractmethod
15
+ def init(self):
16
+ pass
File without changes
@@ -0,0 +1,433 @@
1
+ from typing import Optional
2
+ from wedata_pre_code.common.base_client import BaseClient
3
+
4
+
5
+ class Wedata2PreCodeClient(BaseClient):
6
+ """
7
+ Wedata2预执行代码客户端
8
+ 必传参数:
9
+ - wedata_project_id: 项目ID
10
+ - wedata_notebook_engine: 笔记本引擎
11
+ - qcloud_uin: 腾讯云uin
12
+ - qcloud_subuin: 腾讯云subuin
13
+ - wedata_default_feature_store_database: 默认特征存储数据库
14
+ - wedata_feature_store_databases: 特征存储数据库
15
+ - qcloud_region: 区域
16
+ - mlflow_tracking_uri: mlflow跟踪URI
17
+ - kernel_region: 区域
18
+ - feast_remote_address: feast远程地址
19
+ 可选参数:
20
+ - kernel_task_name: 任务名称
21
+ - kernel_task_id: 任务ID
22
+ - kernel_submit_form_workflow: 任务提交表单工作流
23
+ - kernel_is_international: 是否国际站
24
+ """
25
+ wedata_project_id: str
26
+ wedata_notebook_engine: str
27
+ qcloud_uin: str
28
+ qcloud_subuin: str
29
+ wedata_default_feature_store_database: str
30
+ wedata_feature_store_databases: str
31
+ qcloud_region: str
32
+ mlflow_tracking_uri: str
33
+ kernel_region: str
34
+ kernel_task_name: Optional[str] = ""
35
+ kernel_task_id: Optional[str] = ""
36
+ kernel_submit_form_workflow: Optional[str] = ""
37
+ kernel_is_international: Optional[bool] = False
38
+
39
+ def __init__(self, *args, **kwargs):
40
+ super(Wedata2PreCodeClient, self).__init__(*args, **kwargs)
41
+ self.init()
42
+
43
+ def init(self):
44
+ import json
45
+ import os
46
+ from functools import wraps
47
+
48
+ from mlflow.tracking import MlflowClient
49
+ from mlflow.tracking._tracking_service.client import TrackingServiceClient
50
+
51
+ os.environ["WEDATA_PROJECT_ID"] = self.wedata_project_id
52
+ os.environ["WEDATA_NOTEBOOK_ENGINE"] = self.wedata_notebook_engine
53
+ os.environ["QCLOUD_UIN"] = self.qcloud_uin
54
+ os.environ["QCLOUD_SUBUIN"] = self.qcloud_subuin
55
+ os.environ["WEDATA_DEFAULT_FEATURE_STORE_DATABASE"] = (
56
+ self.wedata_default_feature_store_database
57
+ )
58
+ os.environ["WEDATA_FEATURE_STORE_DATABASES"] = (
59
+ self.wedata_feature_store_databases
60
+ )
61
+ os.environ["QCLOUD_REGION"] = self.qcloud_region
62
+ os.environ["MLFLOW_TRACKING_URI"] = self.mlflow_tracking_uri
63
+
64
+ user_name = self.qcloud_uin
65
+ task_name = self.kernel_task_name
66
+ task_id = self.kernel_task_id
67
+ project_id = self.wedata_project_id
68
+ os.environ["WEDATA_PROJECT_ID"] = project_id
69
+ os.environ["KERNEL_SUBMIT_FORM_WORKFLOW"] = self.kernel_submit_form_workflow
70
+
71
+ region = self.kernel_region
72
+ is_international = self.kernel_is_international
73
+
74
+ template = (
75
+ "https://{region}.wedata.tencentcloud.com"
76
+ if is_international
77
+ else "https://{region}.wedata.cloud.tencent.com"
78
+ ) # 国际站 # 国内站
79
+ base_url = f"{template.format(region=region)}"
80
+
81
+ run_context_data = {
82
+ "mlflow.source.name": task_name,
83
+ "mlflow.user": user_name,
84
+ "wedata.taskId": task_id,
85
+ "wedata.workflowId": self.kernel_submit_form_workflow,
86
+ "wedata.datascience.type": "MACHINE_LEARNING",
87
+ "wedata.project": project_id,
88
+ }
89
+ run_context_value = json.dumps(run_context_data, indent=None)
90
+
91
+ os.environ["MLFLOW_RUN_CONTEXT"] = run_context_value
92
+
93
+ def log_after_terminated(func):
94
+ @wraps(func)
95
+ def wrapper(self, run_id, *args, **kwargs):
96
+ print("wedata log_after_terminated wrapper")
97
+ # 调用原set_terminated
98
+ result = func(self, run_id, *args, **kwargs)
99
+ # 获取experiment_id
100
+ run_info = self.store.get_run(run_id).info
101
+ run_name = run_info.run_name
102
+ experiment_id = run_info.experiment_id
103
+ experment_url = f"{base_url}/datascience/experiments-single/{experiment_id}?ProjectId={project_id}"
104
+ run_url = f"{base_url}/datascience/experiments/task-detail-learn/{run_id}?ProjectId={project_id}"
105
+ print(f"View run {run_name} at :{run_url}")
106
+ print(f"View experiment at:{experment_url}")
107
+ return result
108
+
109
+ return wrapper
110
+
111
+ from mlflow.models.model import Model
112
+
113
+ def inject_model_version_tag(func):
114
+ @wraps(func)
115
+ def wrapper(*args, **kwargs):
116
+ print("wedata inject_model_version_tag wrapper")
117
+ registered_model_name = kwargs.get("registered_model_name")
118
+ if registered_model_name is None:
119
+ # 如果在 args 里,找到它的位置
120
+ import inspect
121
+
122
+ sig = inspect.signature(func)
123
+ params = list(sig.parameters.keys())
124
+ if "registered_model_name" in params:
125
+ idx = params.index("registered_model_name") - 1 # -1 因为 self
126
+ if len(args) > idx:
127
+ registered_model_name = args[idx]
128
+ result = func(*args, **kwargs)
129
+ model_version = result.registered_model_version
130
+ # 添加 tag
131
+ if registered_model_name and model_version:
132
+ from mlflow import MlflowClient
133
+
134
+ MlflowClient().set_model_version_tag(
135
+ registered_model_name,
136
+ model_version,
137
+ "mlflow.user",
138
+ "{user_name}",
139
+ )
140
+ MlflowClient().set_model_version_tag(
141
+ registered_model_name,
142
+ model_version,
143
+ "wedata.project",
144
+ "{project_id}",
145
+ )
146
+ MlflowClient().set_model_version_tag(
147
+ registered_model_name,
148
+ model_version,
149
+ "wedata.datascience.type",
150
+ "MACHINE_LEARNING",
151
+ )
152
+ return result
153
+
154
+ return wrapper
155
+
156
+ Model.log = inject_model_version_tag(Model.log)
157
+
158
+ def inject_project_filter(func):
159
+ @wraps(func)
160
+ def wrapper(*args, **kwargs):
161
+ # 从环境变量获取 project 值
162
+ project = os.getenv("WEDATA_PROJECT_ID")
163
+ if project:
164
+ # 获取原始过滤条件
165
+ filter_str = kwargs.get("filter_string", "")
166
+ # 拼接新的过滤条件(假设 project 存储在 run 的 tag 中)
167
+ new_filter = f"tags.wedata.project = '{project}'"
168
+ if filter_str:
169
+ new_filter = f"({filter_str}) and ({new_filter})"
170
+ kwargs["filter_string"] = new_filter
171
+ return func(*args, **kwargs)
172
+
173
+ return wrapper
174
+
175
+ def inject_project_tag(func):
176
+ @wraps(func)
177
+ def wrapper(self, *args, **kwargs):
178
+ project = os.getenv("WEDATA_PROJECT_ID")
179
+ workflow_id = os.getenv("KERNEL_SUBMIT_FORM_WORKFLOW")
180
+ args_list = list(args)
181
+ if project:
182
+ if "tags" in kwargs:
183
+ tags = kwargs["tags"] or {}
184
+ tags = tags.copy()
185
+ tags["wedata.project"] = project
186
+ tags["wedata.datascience.type"] = "MACHINE_LEARNING"
187
+ tags["wedata.workflowId"] = workflow_id
188
+ kwargs["tags"] = tags
189
+ else:
190
+ current_tags = {}
191
+ method_name = func.__name__
192
+ if current_tags == None:
193
+ if method_name in ("create_experiment", "create_run"):
194
+ if len(args_list) >= 3:
195
+ current_tags = args_list[2]
196
+ elif method_name in ("create_registered_model"):
197
+ if len(args_list) >= 2:
198
+ current_tags = args[1]
199
+ elif method_name in ("create_model_version"):
200
+ if len(args_list) >= 5:
201
+ current_tags = args[4]
202
+ if current_tags is None:
203
+ current_tags = {}
204
+ else:
205
+ current_tags = current_tags.copy() # 避免修改原始字典
206
+ current_tags["wedata.project"] = project
207
+ current_tags["wedata.datascience.type"] = "MACHINE_LEARNING"
208
+ current_tags["mlflow.user"] = "{user_name}"
209
+ kwargs["tags"] = current_tags
210
+ return func(self, *args, **kwargs)
211
+
212
+ return wrapper
213
+
214
+ def validate_wedata_tag(func):
215
+ @wraps(func)
216
+ def wrapper(*args, **kwargs):
217
+ project = os.getenv("WEDATA_PROJECT_ID")
218
+ # 调用原始方法获取 Experiment
219
+ obj = func(*args, **kwargs)
220
+
221
+ # 如果 Experiment 不存在,直接返回错误
222
+ if obj is None:
223
+ # print("object is not exists")
224
+ return obj
225
+
226
+ project_tag = None
227
+ datascience_type_tag = None
228
+ method_name = func.__name__
229
+ obj_name = "object"
230
+ if "run" in method_name:
231
+ project_tag = obj.data.tags.get("wedata.project")
232
+ datascience_type_tag = obj.data.tags.get("wedata.datascience.type")
233
+ obj_name = "run"
234
+ elif "experiment" in method_name:
235
+ obj_name = "experiment"
236
+ project_tag = obj.tags.get("wedata.project")
237
+ datascience_type_tag = obj.tags.get("wedata.datascience.type")
238
+ elif "model" in method_name:
239
+ obj_name = "model"
240
+ project_tag = obj.tags.get("wedata.project")
241
+ datascience_type_tag = obj.tags.get("wedata.datascience.type")
242
+ # 检查标签是否存在且值正确
243
+ if project and project_tag != project:
244
+ print(f"this project:{project},has no {obj_name}")
245
+ return None
246
+ if datascience_type_tag not in ['MACHINE_LEARNING','AUTOML_REGRESSION',
247
+ 'AUTOML_CLASSIFICATION','AUTOML_PREDICTION']:
248
+ print(
249
+ "Only ['MACHINE_LEARNING','AUTOML_REGRESSION','AUTOML_CLASSIFICATION','AUTOML_PREDICTION']"
250
+ " experiment/run/model can be operated in the notebook"
251
+ )
252
+ return None
253
+ return obj
254
+
255
+ return wrapper
256
+
257
+ def validate_wedata_before_operation(func):
258
+ @wraps(func)
259
+ def wrapper(self, *args, **kwargs):
260
+ project = os.getenv("WEDATA_PROJECT_ID")
261
+ # 如果未设置环境变量,直接执行原删除操作
262
+ if not project:
263
+ return func(self, *args, **kwargs)
264
+ method_name = func.__name__
265
+
266
+ id_name = None
267
+ res = None
268
+ project_tag = None
269
+ data_science_type = None
270
+ # 如果设置了环境变量,则校验标签
271
+ # 获取 Experiment 对象
272
+ if "experiment" in method_name:
273
+ id_name = kwargs.get("experiment_id") or (args[0] if args else None)
274
+ res = self.get_experiment(id_name)
275
+ if not res:
276
+ print(
277
+ f"Experiment: '{id_name}' not exist or does not have permission to operate"
278
+ )
279
+ return
280
+ project_tag = res.tags.get("wedata.project")
281
+ data_science_type = res.tags.get("wedata.datascience.type")
282
+ elif "model" in method_name:
283
+ id_name = kwargs.get("name") or (args[0] if args else None)
284
+ res = self.get_registered_model(id_name)
285
+ if not res:
286
+ print(
287
+ f"Model '{id_name}' not exist or does not have permission to operate"
288
+ )
289
+ return
290
+ project_tag = res.tags.get("wedata.project")
291
+ data_science_type = res.tags.get("wedata.datascience.type")
292
+ else:
293
+ id_name = kwargs.get("run_id") or (args[0] if args else None)
294
+ res = self.get_run(id_name)
295
+ if not res:
296
+ print(
297
+ f"run: '{id_name}' not exist or does not have permission to operate"
298
+ )
299
+ return
300
+ project_tag = res.data.tags.get("wedata.project")
301
+ data_science_type = res.data.tags.get("wedata.datascience.type")
302
+ # print(f"query result:{res}")
303
+ # 检查标签是否匹配
304
+ if project_tag != project or data_science_type not in ['MACHINE_LEARNING','AUTOML_REGRESSION','AUTOML_CLASSIFICATION','AUTOML_PREDICTION']:
305
+ print(f"Unauthorized operation:{method_name} ({id_name})")
306
+ return # 不执行删除
307
+
308
+ # print(method_name)
309
+ # 操作标签的操作需要确认不会影响内置标签wedata.project
310
+ if method_name in (
311
+ "update_tag",
312
+ "delete_tags",
313
+ "set_registered_model_tag",
314
+ "delete_registered_model_tag",
315
+ "delete_model_version_tag",
316
+ "set_experiment_tag",
317
+ ):
318
+ # 获取 key 参数的值
319
+ key_value = kwargs.get("key") or (args[1] if args else None)
320
+ print(key_value)
321
+ if key_value == "wedata.project":
322
+ print(f"No permission to operate protected tags: {key_value}")
323
+ return
324
+ # 标签匹配,执行删除
325
+ return func(self, *args, **kwargs)
326
+
327
+ return wrapper
328
+
329
+ # 1. 应用装饰器,过滤条件filter_str 中添加tag
330
+ MlflowClient.search_experiments = inject_project_filter(
331
+ MlflowClient.search_experiments
332
+ )
333
+ MlflowClient.search_runs = inject_project_filter(MlflowClient.search_runs)
334
+ MlflowClient.search_registered_models = inject_project_filter(
335
+ MlflowClient.search_registered_models
336
+ )
337
+ MlflowClient.search_model_versions = inject_project_filter(
338
+ MlflowClient.search_model_versions
339
+ )
340
+ MlflowClient.create_experiment = inject_project_tag(
341
+ MlflowClient.create_experiment
342
+ )
343
+ MlflowClient.create_registered_model = inject_project_tag(
344
+ MlflowClient.create_registered_model
345
+ )
346
+ MlflowClient.create_model_version = inject_project_tag(
347
+ MlflowClient.create_model_version
348
+ )
349
+ # 2. 后置返回结果过滤wedata_project tag
350
+ MlflowClient.get_experiment = validate_wedata_tag(MlflowClient.get_experiment)
351
+ MlflowClient.get_experiment_by_name = validate_wedata_tag(
352
+ MlflowClient.get_experiment_by_name
353
+ )
354
+ MlflowClient.get_run = validate_wedata_tag(MlflowClient.get_run)
355
+ MlflowClient.get_parent_run = validate_wedata_tag(MlflowClient.get_parent_run)
356
+ MlflowClient.get_registered_model = validate_wedata_tag(
357
+ MlflowClient.get_registered_model
358
+ )
359
+ TrackingServiceClient.set_terminated = log_after_terminated(
360
+ TrackingServiceClient.set_terminated
361
+ )
362
+ # MlflowClient.get_model_version = validate_wedata_tag(MlflowClient.get_model_version)
363
+ # MlflowClient.get_model_version_download_uri = validate_wedata_tag(MlflowClient.get_model_version_download_uri)
364
+ # MlflowClient.get_latest_versions = validate_wedata_tag(MlflowClient.get_latest_versions)
365
+ # 4. 操作前校验,参数experment_id
366
+ MlflowClient.delete_experiment = validate_wedata_before_operation(
367
+ MlflowClient.delete_experiment
368
+ )
369
+ MlflowClient.restore_experiment = validate_wedata_before_operation(
370
+ MlflowClient.restore_experiment
371
+ )
372
+ MlflowClient.rename_experiment = validate_wedata_before_operation(
373
+ MlflowClient.rename_experiment
374
+ )
375
+ MlflowClient.set_experiment_tag = validate_wedata_before_operation(
376
+ MlflowClient.set_experiment_tag
377
+ )
378
+ # 操作前校验 参数run_id
379
+ MlflowClient.set_tag = validate_wedata_before_operation(MlflowClient.set_tag)
380
+ MlflowClient.delete_tag = validate_wedata_before_operation(
381
+ MlflowClient.delete_tag
382
+ )
383
+ MlflowClient.update_run = validate_wedata_before_operation(
384
+ MlflowClient.update_run
385
+ )
386
+ MlflowClient.download_artifacts = validate_wedata_before_operation(
387
+ MlflowClient.download_artifacts
388
+ )
389
+ MlflowClient.list_artifacts = validate_wedata_before_operation(
390
+ MlflowClient.list_artifacts
391
+ )
392
+ MlflowClient.delete_run = validate_wedata_before_operation(
393
+ MlflowClient.delete_run
394
+ )
395
+ MlflowClient.restore_run = validate_wedata_before_operation(
396
+ MlflowClient.restore_run
397
+ )
398
+ # 操作前校验 参数name
399
+ MlflowClient.rename_registered_model = validate_wedata_before_operation(
400
+ MlflowClient.rename_registered_model
401
+ )
402
+ MlflowClient.update_registered_model = validate_wedata_before_operation(
403
+ MlflowClient.update_registered_model
404
+ )
405
+ MlflowClient.delete_registered_model = validate_wedata_before_operation(
406
+ MlflowClient.delete_registered_model
407
+ )
408
+ MlflowClient.update_model_version = validate_wedata_before_operation(
409
+ MlflowClient.update_model_version
410
+ )
411
+ MlflowClient.delete_model_version = validate_wedata_before_operation(
412
+ MlflowClient.delete_model_version
413
+ )
414
+ MlflowClient.set_model_version_tag = validate_wedata_before_operation(
415
+ MlflowClient.set_model_version_tag
416
+ )
417
+ MlflowClient.delete_model_version_tag = validate_wedata_before_operation(
418
+ MlflowClient.delete_model_version_tag
419
+ )
420
+ MlflowClient.set_registered_model_alias = validate_wedata_before_operation(
421
+ MlflowClient.set_registered_model_alias
422
+ )
423
+ MlflowClient.delete_registered_model_alias = validate_wedata_before_operation(
424
+ MlflowClient.delete_registered_model_alias
425
+ )
426
+
427
+ # TOOD:设置tag相关需要校验设置的key是否为wedata_project
428
+ MlflowClient.set_registered_model_tag = validate_wedata_before_operation(
429
+ MlflowClient.set_registered_model_tag
430
+ )
431
+ MlflowClient.delete_registered_model_tag = validate_wedata_before_operation(
432
+ MlflowClient.delete_registered_model_tag
433
+ )
File without changes
@@ -0,0 +1,425 @@
1
+ from typing import Optional
2
+
3
+ from wedata_pre_code.common.base_client import BaseClient
4
+
5
+ __doc__ = """
6
+ Wedata3预执行代码客户端
7
+ """
8
+
9
+ PROXY_HEADER_KEY_IP = "X-Target-Service-IP"
10
+ PROXY_HEADER_KEY_PORT = "X-Target-Service-PORT"
11
+
12
+ FEAST_PROXY_ENV_KEY_IP = "FEAST_SERVICE_IP"
13
+ FEAST_PROXY_ENV_KEY_PORT = "FEAST_SERVICE_PORT"
14
+
15
+ MLFLOW_PROXY_ENV_KEY_IP = "MLFLOW_SERVICE_IP"
16
+ MLFLOW_PROXY_ENV_KEY_PORT = "MLFLOW_SERVICE_PORT"
17
+
18
+ KERNEL_WEDATA_PREFIX = "KERNEL_WEDATA_"
19
+
20
+
21
+ def get_kernel_env_key(key):
22
+ """
23
+ 设置Wedata特定的变量前缀,避免于系统内置变量冲突。
24
+ 例如:key = "REGION" , 最终返回 "KERNEL_WEDATA_REGION"
25
+ :param key:
26
+ :return:
27
+ """
28
+ return KERNEL_WEDATA_PREFIX + key
29
+
30
+
31
+ class Wedata3PreCodeClient(BaseClient):
32
+ """
33
+ Wedata3预执行代码客户端
34
+ 必传参数:
35
+ - workspace_id: 工作空间ID
36
+ - mlflow_tracking_uri: mlflow跟踪URI
37
+ - base_url: 基础URL
38
+ - mlflow_gateway_url: mlflow基础serverless网关地址
39
+ - feast_gateway_url: feast基础serverless网关地址
40
+ - mlflow_proxy_ip: mlflow转发地址
41
+ - mlflow_proxy_port: mlflow转发端口
42
+ - feast_proxy_ip: feast转发地址
43
+ - feast_proxy_port: feast转发端口
44
+ 可选参数:
45
+ - region: 区域
46
+ - ap_region_id: 区域ID
47
+ - kernel_task_name: Notebook路径名
48
+ - kernel_task_id: Notebook文件ID
49
+ - kernel_submit_form_workflow: 任务提交表单工作流
50
+ - kernel_is_international: 是否国际站
51
+ - cloud_sdk_secret_id: 云SDK密钥ID
52
+ - cloud_sdk_secret_key: 云SDK密钥KEY
53
+ - cloud_sdk_secret_token: 云SDK密钥TOKEN
54
+ - qcloud_uin: 腾讯云uin
55
+ - qcloud_subuin: 腾讯云subuin
56
+ """
57
+
58
+ workspace_id: str
59
+ base_url: str
60
+ region: Optional[str] = ""
61
+ ap_region_id: Optional[int] = 0
62
+ # 基础serverless网关地址
63
+ mlflow_gateway_url: str
64
+ feast_gateway_url: str
65
+ # mlflow转发地址
66
+ mlflow_proxy_ip: str
67
+ mlflow_proxy_port: str
68
+ # feast转发地址
69
+ feast_proxy_ip: str
70
+ feast_proxy_port: str
71
+ # 系统内置变量
72
+ kernel_task_name: Optional[str]
73
+ kernel_task_id: Optional[str]
74
+ kernel_submit_form_workflow: Optional[str] = ""
75
+ # SDK相关
76
+ cloud_sdk_secret_id: Optional[str] = ""
77
+ cloud_sdk_secret_key: Optional[str] = ""
78
+ cloud_sdk_secret_token: Optional[str] = ""
79
+ # 帐号相关
80
+ qcloud_uin: Optional[str] = ""
81
+ qcloud_subuin: Optional[str] = ""
82
+
83
+ def init(self):
84
+ import inspect
85
+ import json
86
+ import os
87
+ from functools import wraps
88
+
89
+ import feast
90
+ import feast.infra.registry.remote
91
+ import grpc
92
+ import mlflow
93
+ from feast.errors import FeastError
94
+ from feast.protos.feast.registry import RegistryServer_pb2_grpc
95
+ from mlflow.models.model import Model
96
+ from mlflow.tracking import MlflowClient
97
+ from mlflow.tracking._tracking_service.client import TrackingServiceClient
98
+
99
+ mlflow_tracking_uri = f"http://{self.mlflow_proxy_ip}:{self.mlflow_proxy_port}"
100
+ feast_remote_address = f"{self.feast_proxy_ip}:{self.feast_proxy_port}"
101
+
102
+ if self.mlflow_gateway_url:
103
+ mlflow_tracking_uri = f"http://{self.mlflow_gateway_url}"
104
+ os.environ[MLFLOW_PROXY_ENV_KEY_IP] = self.mlflow_proxy_ip
105
+ os.environ[MLFLOW_PROXY_ENV_KEY_PORT] = self.mlflow_proxy_port
106
+
107
+ if self.feast_gateway_url:
108
+ feast_remote_address = self.feast_gateway_url
109
+ os.environ[FEAST_PROXY_ENV_KEY_IP] = self.feast_proxy_ip
110
+ os.environ[FEAST_PROXY_ENV_KEY_PORT] = self.feast_proxy_port
111
+
112
+ # os.environ["MLFLOW_RUN_CONTEXT"] = self.run_context_data
113
+ os.environ["WEDATA_WORKSPACE_ID"] = self.workspace_id
114
+ os.environ["MLFLOW_TRACKING_URI"] = mlflow_tracking_uri
115
+ os.environ[get_kernel_env_key("REGION")] = self.region
116
+
117
+ os.environ["KERNEL_FEAST_REMOTE_ADDRESS"] = feast_remote_address
118
+
119
+ # 设置系统内置变量
120
+ os.environ[get_kernel_env_key("TASK_NAME")] = self.kernel_task_name
121
+ os.environ[get_kernel_env_key("TASK_ID")] = self.kernel_task_id
122
+ os.environ[get_kernel_env_key("SUBMIT_FORM_WORKFLOW")] = self.kernel_submit_form_workflow
123
+ os.environ[get_kernel_env_key("CLOUD_SDK_SECRET_ID")] = self.cloud_sdk_secret_id
124
+ os.environ[get_kernel_env_key("CLOUD_SDK_SECRET_KEY")] = self.cloud_sdk_secret_key
125
+ os.environ[get_kernel_env_key("CLOUD_SDK_SECRET_TOKEN")] = self.cloud_sdk_secret_token
126
+ os.environ[get_kernel_env_key("QCLOUD_UIN")] = self.qcloud_uin
127
+ os.environ[get_kernel_env_key("QCLOUD_SUBUIN")] = self.qcloud_subuin
128
+
129
+ mlflow.set_tracking_uri(mlflow_tracking_uri)
130
+
131
+ if not os.environ.get("MLFLOW_RUN_CONTEXT"):
132
+ # 避免重复设置
133
+ run_context_data = {
134
+ "mlflow.source.name": self.kernel_task_name,
135
+ "mlflow.user": self.qcloud_uin,
136
+ "wedata.taskId": self.kernel_task_id,
137
+ "wedata.workflowId": self.kernel_submit_form_workflow,
138
+ "wedata.datascience.type": "MACHINE_LEARNING",
139
+ "wedata.workspace": self.workspace_id,
140
+ }
141
+ run_context_value = json.dumps(run_context_data, indent=None)
142
+
143
+ os.environ["MLFLOW_RUN_CONTEXT"] = run_context_value
144
+
145
+ if self.region:
146
+ # 日志输出装饰器
147
+ base_url = self.base_url
148
+ workspace_id = self.workspace_id
149
+ ap_region_id = self.ap_region_id
150
+
151
+ def log_after_terminated(func):
152
+ @wraps(func)
153
+ def wrapper(self, run_id, *args, **kwargs):
154
+ print("wedata log_after_terminated wrapper")
155
+ result = func(self, run_id, *args, **kwargs)
156
+ run_info = self.store.get_run(run_id).info
157
+ run_name = run_info.run_name
158
+ experiment_id = run_info.experiment_id
159
+ experiment_url = f"${base_url}/datascience/experiments/experiments-single/{experiment_id}?o=${workspace_id}&r={ap_region_id}"
160
+ run_url = f"${base_url}/datascience/experiments/task-detail-learn/{run_id}?o=${workspace_id}&r={ap_region_id}"
161
+ print(f"View run {run_name} at :{run_url}")
162
+ print(f"View experiment at:{experiment_url}")
163
+ return result
164
+
165
+ return wrapper
166
+
167
+ TrackingServiceClient.set_terminated = log_after_terminated(TrackingServiceClient.set_terminated)
168
+
169
+ # 模型版本标签注入装饰器
170
+ def inject_model_version_tag(func):
171
+ @wraps(func)
172
+ def wrapper(*args, **kwargs):
173
+ print("wedata inject_model_version_tag wrapper")
174
+ registered_model_name = kwargs.get("registered_model_name")
175
+ if registered_model_name is None:
176
+ sig = inspect.signature(func)
177
+ params = list(sig.parameters.keys())
178
+ if "registered_model_name" in params:
179
+ idx = params.index("registered_model_name") - 1
180
+ if len(args) > idx:
181
+ registered_model_name = args[idx]
182
+ result = func(*args, **kwargs)
183
+ model_version = result.registered_model_version
184
+ if registered_model_name and model_version:
185
+ from mlflow import MlflowClient
186
+
187
+ MlflowClient().set_model_version_tag(registered_model_name, model_version, "mlflow.user", "${uin}")
188
+ MlflowClient().set_model_version_tag(
189
+ registered_model_name,
190
+ model_version,
191
+ "wedata.workspace",
192
+ "${workspaceId}",
193
+ )
194
+ MlflowClient().set_model_version_tag(
195
+ registered_model_name,
196
+ model_version,
197
+ "wedata.datascience.type",
198
+ "MACHINE_LEARNING",
199
+ )
200
+ return result
201
+
202
+ return wrapper
203
+
204
+ Model.log = inject_model_version_tag(Model.log)
205
+
206
+ # 项目标签注入装饰器
207
+ def inject_workspace_tag(func):
208
+ @wraps(func)
209
+ def wrapper(self, *args, **kwargs):
210
+ workspace = os.getenv("WEDATA_WORKSPACE_ID")
211
+ args_list = list(args)
212
+ if workspace:
213
+ if "tags" in kwargs:
214
+ tags = kwargs["tags"] or {}
215
+ tags = tags.copy()
216
+ # 如果传入的参数中有wedata.workspace和wedata.datascience.type,则不进行注入
217
+ if "wedata.workspace" not in tags:
218
+ tags["wedata.workspace"] = workspace
219
+ if "wedata.datascience.type" not in tags:
220
+ tags["wedata.datascience.type"] = "MACHINE_LEARNING"
221
+ kwargs["tags"] = tags
222
+ else:
223
+ current_tags = None
224
+ method_name = func.__name__
225
+ if current_tags is None:
226
+ if method_name in ("create_experiment", "create_run"):
227
+ if len(args_list) >= 3:
228
+ current_tags = args_list[2]
229
+ elif method_name in ("create_registered_model"):
230
+ if len(args_list) >= 2:
231
+ current_tags = args_list[1]
232
+ elif method_name in ("create_model_version"):
233
+ if len(args_list) >= 5:
234
+ current_tags = args_list[4]
235
+ if current_tags is None:
236
+ current_tags = {}
237
+ else:
238
+ current_tags = current_tags.copy()
239
+ current_tags["wedata.workspace"] = workspace
240
+ current_tags["wedata.datascience.type"] = "MACHINE_LEARNING"
241
+ current_tags["mlflow.user"] = "${uin}"
242
+ kwargs["tags"] = current_tags
243
+ return func(self, *args, **kwargs)
244
+
245
+ return wrapper
246
+
247
+ # 标签验证装饰器
248
+ def validate_wedata_tag(func):
249
+ @wraps(func)
250
+ def wrapper(*args, **kwargs):
251
+ workspace = os.getenv("WEDATA_WORKSPACE_ID")
252
+ obj = func(*args, **kwargs)
253
+ if obj is None:
254
+ return obj
255
+ workspace_tag = None
256
+ datascience_type_tag = None
257
+ method_name = func.__name__
258
+ obj_name = "object"
259
+ if "run" in method_name:
260
+ workspace_tag = obj.data.tags.get("wedata.workspace")
261
+ datascience_type_tag = obj.data.tags.get("wedata.datascience.type")
262
+ obj_name = "run"
263
+ elif "experiment" in method_name:
264
+ obj_name = "experiment"
265
+ workspace_tag = obj.tags.get("wedata.workspace")
266
+ datascience_type_tag = obj.tags.get("wedata.datascience.type")
267
+ elif "model" in method_name:
268
+ obj_name = "model"
269
+ workspace_tag = obj.tags.get("wedata.workspace")
270
+ datascience_type_tag = obj.tags.get("wedata.datascience.type")
271
+ if workspace and workspace_tag != workspace:
272
+ print(f"this workspace:{workspace},has no {obj_name}")
273
+ return None
274
+ if datascience_type_tag not in ("MACHINE_LEARNING", "DEEP_LEARNING"):
275
+ print("Only MACHINE_LEARNING and DEEP_LEARNING experiment/run/model can be operated in the notebook")
276
+ return None
277
+ return obj
278
+
279
+ return wrapper
280
+
281
+ # 操作前验证装饰器
282
+ def validate_wedata_before_operation(func):
283
+ @wraps(func)
284
+ def wrapper(self, *args, **kwargs):
285
+ workspace = os.getenv("WEDATA_WORKSPACE_ID")
286
+ if not workspace:
287
+ return func(self, *args, **kwargs)
288
+ method_name = func.__name__
289
+ id_name = None
290
+ res = None
291
+ workspace_tag = None
292
+ data_science_type = None
293
+ if "experiment" in method_name:
294
+ id_name = kwargs.get("experiment_id") or (args[0] if args else None)
295
+ res = self.get_experiment(id_name)
296
+ if not res:
297
+ print(f"Experiment: '{id_name}' not exist or does not have permission to operate")
298
+ return
299
+ workspace_tag = res.tags.get("wedata.workspace")
300
+ data_science_type = res.tags.get("wedata.datascience.type")
301
+ elif "model" in method_name:
302
+ id_name = kwargs.get("name") or (args[0] if args else None)
303
+ res = self.get_registered_model(id_name)
304
+ if not res:
305
+ print(f"Model '{id_name}' not exist or does not have permission to operate")
306
+ return
307
+ workspace_tag = res.tags.get("wedata.workspace")
308
+ data_science_type = res.tags.get("wedata.datascience.type")
309
+ else:
310
+ id_name = kwargs.get("run_id") or (args[0] if args else None)
311
+ res = self.get_run(id_name)
312
+ if not res:
313
+ print(f"run: '{id_name}' not exist or does not have permission to operate")
314
+ return
315
+ workspace_tag = res.data.tags.get("wedata.workspace")
316
+ data_science_type = res.data.tags.get("wedata.datascience.type")
317
+ if workspace_tag != workspace or data_science_type not in (
318
+ "MACHINE_LEARNING",
319
+ "DEEP_LEARNING",
320
+ ):
321
+ print(f"Unauthorized operation:{method_name} ({id_name})")
322
+ return
323
+ if method_name in (
324
+ "update_tag",
325
+ "delete_tags",
326
+ "set_registered_model_tag",
327
+ "delete_registered_model_tag",
328
+ "delete_model_version_tag",
329
+ "set_experiment_tag",
330
+ ):
331
+ key_value = kwargs.get("key") or (args[1] if args else None)
332
+ if key_value == "wedata.workspace":
333
+ print(f"No permission to operate protected tags: {key_value}")
334
+ return
335
+ return func(self, *args, **kwargs)
336
+
337
+ return wrapper
338
+
339
+ # 应用装饰器
340
+ MlflowClient.create_experiment = inject_workspace_tag(MlflowClient.create_experiment)
341
+ MlflowClient.create_registered_model = inject_workspace_tag(MlflowClient.create_registered_model)
342
+ MlflowClient.create_model_version = inject_workspace_tag(MlflowClient.create_model_version)
343
+ MlflowClient.get_experiment = validate_wedata_tag(MlflowClient.get_experiment)
344
+ MlflowClient.get_experiment_by_name = validate_wedata_tag(MlflowClient.get_experiment_by_name)
345
+ MlflowClient.get_run = validate_wedata_tag(MlflowClient.get_run)
346
+ MlflowClient.get_parent_run = validate_wedata_tag(MlflowClient.get_parent_run)
347
+ MlflowClient.get_registered_model = validate_wedata_tag(MlflowClient.get_registered_model)
348
+ MlflowClient.delete_experiment = validate_wedata_before_operation(MlflowClient.delete_experiment)
349
+ MlflowClient.restore_experiment = validate_wedata_before_operation(MlflowClient.restore_experiment)
350
+ MlflowClient.rename_experiment = validate_wedata_before_operation(MlflowClient.rename_experiment)
351
+ MlflowClient.set_experiment_tag = validate_wedata_before_operation(MlflowClient.set_experiment_tag)
352
+ MlflowClient.set_tag = validate_wedata_before_operation(MlflowClient.set_tag)
353
+ MlflowClient.delete_tag = validate_wedata_before_operation(MlflowClient.delete_tag)
354
+ MlflowClient.update_run = validate_wedata_before_operation(MlflowClient.update_run)
355
+ MlflowClient.download_artifacts = validate_wedata_before_operation(MlflowClient.download_artifacts)
356
+ MlflowClient.list_artifacts = validate_wedata_before_operation(MlflowClient.list_artifacts)
357
+ MlflowClient.delete_run = validate_wedata_before_operation(MlflowClient.delete_run)
358
+ MlflowClient.restore_run = validate_wedata_before_operation(MlflowClient.restore_run)
359
+ MlflowClient.rename_registered_model = validate_wedata_before_operation(MlflowClient.rename_registered_model)
360
+ MlflowClient.update_registered_model = validate_wedata_before_operation(MlflowClient.update_registered_model)
361
+ MlflowClient.delete_registered_model = validate_wedata_before_operation(MlflowClient.delete_registered_model)
362
+ MlflowClient.update_model_version = validate_wedata_before_operation(MlflowClient.update_model_version)
363
+ MlflowClient.delete_model_version = validate_wedata_before_operation(MlflowClient.delete_model_version)
364
+ MlflowClient.set_model_version_tag = validate_wedata_before_operation(MlflowClient.set_model_version_tag)
365
+ MlflowClient.delete_model_version_tag = validate_wedata_before_operation(MlflowClient.delete_model_version_tag)
366
+ MlflowClient.set_registered_model_alias = validate_wedata_before_operation(MlflowClient.set_registered_model_alias)
367
+ MlflowClient.delete_registered_model_alias = validate_wedata_before_operation(MlflowClient.delete_registered_model_alias)
368
+ MlflowClient.set_registered_model_tag = validate_wedata_before_operation(MlflowClient.set_registered_model_tag)
369
+ MlflowClient.delete_registered_model_tag = validate_wedata_before_operation(MlflowClient.delete_registered_model_tag)
370
+
371
+ _original_remote_registry = feast.infra.registry.remote.RemoteRegistry
372
+
373
+ def add_feast_proxy_header():
374
+ def func(*args, **kwargs):
375
+ registry = _original_remote_registry(*args, **kwargs)
376
+ proxy_header_interceptor = GrpcClientProxyHeaderInterceptor()
377
+ registry.channel = grpc.intercept_channel(registry.channel, proxy_header_interceptor)
378
+ registry.stub = RegistryServer_pb2_grpc.RegistryServerStub(registry.channel)
379
+ return registry
380
+
381
+ return func
382
+
383
+ class GrpcClientProxyHeaderInterceptor(
384
+ grpc.UnaryUnaryClientInterceptor,
385
+ grpc.UnaryStreamClientInterceptor,
386
+ grpc.StreamUnaryClientInterceptor,
387
+ grpc.StreamStreamClientInterceptor,
388
+ ):
389
+ def __init__(self):
390
+ self.proxy_ip = os.environ.get(FEAST_PROXY_ENV_KEY_IP)
391
+ self.proxy_port = os.environ.get(FEAST_PROXY_ENV_KEY_PORT)
392
+ if not self.proxy_ip:
393
+ raise FeastError(f"Environment variable `{FEAST_PROXY_ENV_KEY_IP}` is not set")
394
+ if not self.proxy_port:
395
+ raise FeastError(f"Environment variable `{FEAST_PROXY_ENV_KEY_PORT}` is not set")
396
+
397
+ def intercept_unary_unary(self, continuation, client_call_details, request_iterator):
398
+ return self._handle_call(continuation, client_call_details, request_iterator)
399
+
400
+ def intercept_unary_stream(self, continuation, client_call_details, request_iterator):
401
+ return self._handle_call(continuation, client_call_details, request_iterator)
402
+
403
+ def intercept_stream_unary(self, continuation, client_call_details, request_iterator):
404
+ return self._handle_call(continuation, client_call_details, request_iterator)
405
+
406
+ def intercept_stream_stream(self, continuation, client_call_details, request_iterator):
407
+ return self._handle_call(continuation, client_call_details, request_iterator)
408
+
409
+ def _handle_call(self, continuation, client_call_details, request_iterator):
410
+ client_call_details = self._append_proxy_header_metadata(client_call_details)
411
+ result = continuation(client_call_details, request_iterator)
412
+ if result.exception() is not None:
413
+ mapped_error = FeastError.from_error_detail(result.exception().details())
414
+ if mapped_error is not None:
415
+ raise mapped_error
416
+ return result
417
+
418
+ def _append_proxy_header_metadata(self, client_call_details):
419
+ metadata = client_call_details.metadata or []
420
+ metadata.append((PROXY_HEADER_KEY_IP.lower(), self.proxy_ip))
421
+ metadata.append((PROXY_HEADER_KEY_PORT.lower(), self.proxy_port))
422
+ client_call_details = client_call_details._replace(metadata=metadata)
423
+ return client_call_details
424
+
425
+ feast.infra.registry.remote.RemoteRegistry = add_feast_proxy_header()
@@ -0,0 +1,232 @@
1
+ Metadata-Version: 2.3
2
+ Name: wedata-pre-code
3
+ Version: 1.0.8
4
+ Summary: WeData平台的预执行代码库,为机器学习实验提供与MLflow的深度集成
5
+ Author: WeData Team
6
+ Author-email: WeData Team <wedata@tencent.com>
7
+ License: MIT
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Dist: feast==0.49.0
18
+ Requires-Dist: grpcio>=1.71.0
19
+ Requires-Dist: mlflow>=2.0.0
20
+ Requires-Dist: pydantic>=2.10.6
21
+ Requires-Dist: mlflow>=2.0.0,<3.0.0 ; extra == 'mlflow-v2'
22
+ Requires-Dist: wedata-mlflow-header-plugin>=0.1.2 ; extra == 'wedata-3'
23
+ Requires-Python: >=3.9
24
+ Project-URL: Documentation, https://wedata.tencent.com/docs
25
+ Project-URL: Homepage, https://wedata.tencent.com
26
+ Provides-Extra: mlflow-v2
27
+ Provides-Extra: wedata-3
28
+ Description-Content-Type: text/markdown
29
+
30
+ # WeData Pre-Code Library
31
+
32
+ WeData平台的预执行代码库,为机器学习实验提供与MLflow的深度集成和WeData平台的功能增强。
33
+
34
+ ## 项目概述
35
+
36
+ 本项目提供了两个版本的WeData客户端,用于在WeData平台上运行机器学习实验时提供以下功能:
37
+
38
+ - **MLflow集成增强**:自动注入WeData平台特定的标签和过滤条件
39
+ - **权限控制**:基于项目/工作空间的权限验证机制
40
+ - **URL生成**:自动生成实验和运行的查看链接
41
+ - **环境配置**:自动设置运行环境变量
42
+
43
+ ## 版本说明
44
+
45
+ ### Wedata2PreCodeClient (WeData 2.0版本)
46
+
47
+ 适用于WeData 2.0平台的客户端,主要特性:
48
+
49
+ - 基于项目ID进行权限控制
50
+ - 支持国内站和国际站URL模板
51
+ - 自动注入项目标签和机器学习类型标签
52
+ - 提供完整的MLflow客户端装饰器
53
+
54
+ ### Wedata3PreCodeClient (WeData 3.0版本)
55
+
56
+ 适用于WeData 3.0平台的客户端,主要特性:
57
+
58
+ - 基于工作空间ID进行权限控制
59
+ - 支持更灵活的配置选项
60
+ - 增强的标签注入和验证机制
61
+ - 支持机器学习和深度学习两种实验类型
62
+
63
+ ## 安装和使用
64
+
65
+ ### 安装依赖
66
+
67
+ ```bash
68
+ pip install mlflow
69
+ ```
70
+
71
+ ### 使用Wedata2PreCodeClient
72
+
73
+ ```python
74
+ from wedata_pre_code.wedata2.client import Wedata2PreCodeClient
75
+
76
+ # 初始化客户端
77
+ client = Wedata2PreCodeClient(
78
+ wedata_project_id="{{WEDATA_PROJECT_ID}}",
79
+ wedata_notebook_engine="{{WEDATA_NOTEBOOK_ENGINE}}",
80
+ qcloud_uin="{{QCLOUD_UIN}}",
81
+ qcloud_subuin="{{QCLOUD_SUBUIN}}",
82
+ wedata_default_feature_store_database="{{WEDATA_DEFAULT_FEATURE_STORE_DATABASE}}",
83
+ wedata_feature_store_databases="{{WEDATA_FEATURE_STORE_DATABASES}}",
84
+ qcloud_region="{{QCLOUD_REGION}}",
85
+ mlflow_tracking_uri="{{KERNEL_MLFLOW_TRACKING_URI}}",
86
+ feast_remote_address="{{KERNEL_FEAST_REMOTE_ADDRESS}}",
87
+ kernel_submit_form_workflow="{{KERNEL_SUBMIT_FORM_WORKFLOW}}",
88
+ kernel_task_name="{{KERNEL_TASK_NAME}}",
89
+ kernel_task_id="{{KERNEL_TASK_ID}}",
90
+ kernel_region="ap-chongqing",
91
+ kernel_is_international=bool("{{KERNEL_IS_INTERNATIONAL}}")
92
+ )
93
+
94
+ # 现在可以使用MLflow客户端,会自动应用WeData的增强功能
95
+ import mlflow
96
+ mlflow.start_run()
97
+ # ... 你的实验代码
98
+ ```
99
+
100
+ ### 使用Wedata3PreCodeClient
101
+
102
+ ```python
103
+ %pip install wedata-pre-code
104
+ from wedata_pre_code.wedata3.client import Wedata3PreCodeClient
105
+
106
+ # 初始化客户端
107
+ client = Wedata3PreCodeClient(
108
+ workspace_id="{{WorkspaceID}}",
109
+ base_url="{{BaseUrl}}",
110
+ region="{{Region}}",
111
+ ap_region_id=int("{{RegionId}}"),
112
+ mlflow_gateway_url="{{MlflowGatewayUrl}}",
113
+ feast_gateway_url="{{FeastGatewayUrl}}",
114
+ mlflow_proxy_ip="{{MlflowProxyIp}}",
115
+ mlflow_proxy_port="{{MlflowProxyPort}}",
116
+ feast_proxy_ip="{{FeastProxyIp}}",
117
+ feast_proxy_port="{{FeastProxyPort}}",
118
+ kernel_task_name="{{TaskName}}",
119
+ kernel_task_id="{{TaskId}}",
120
+ kernel_submit_form_workflow="{{SubmitFormWorkflow}}",
121
+ cloud_sdk_secret_id="{{CloudSdkSecretId}}",
122
+ cloud_sdk_secret_key="{{CloudSdkSecretKey}}",
123
+ cloud_sdk_secret_token="{{CloudSdkSecretToken}}",
124
+ qcloud_uin="{{QcloudUin}}",
125
+ qcloud_subuin="{{QcloudSubUin}}",
126
+ )
127
+
128
+ client.init()
129
+ ```
130
+
131
+ * 必传参数:
132
+ - workspace_id: 工作空间ID
133
+ - mlflow_tracking_uri: mlflow跟踪URI
134
+ - base_url: 基础URL
135
+ - mlflow_gateway_url: mlflow基础serverless网关地址
136
+ - feast_gateway_url: feast基础serverless网关地址
137
+ - mlflow_proxy_ip: mlflow转发地址
138
+ - mlflow_proxy_port: mlflow转发端口
139
+ - feast_proxy_ip: feast转发地址
140
+ - feast_proxy_port: feast转发端口
141
+ * 可选参数:
142
+ - region: 区域
143
+ - ap_region_id: 区域ID
144
+ - kernel_task_name: Notebook路径名
145
+ - kernel_task_id: Notebook文件ID
146
+ - kernel_submit_form_workflow: 任务提交表单工作流
147
+ - kernel_is_international: 是否国际站
148
+ - cloud_sdk_secret_id: 云SDK密钥ID
149
+ - cloud_sdk_secret_key: 云SDK密钥KEY
150
+ - cloud_sdk_secret_token: 云SDK密钥TOKEN
151
+ - qcloud_uin: 腾讯云uin
152
+ - qcloud_subuin: 腾讯云subuin
153
+ -
154
+ ## 功能特性
155
+
156
+ ### 自动标签注入
157
+
158
+ - 自动为实验、运行和模型注入WeData平台标签
159
+ - 包括项目ID、工作空间ID、机器学习类型等信息
160
+ - 确保数据在平台上的可追溯性
161
+
162
+ ### 权限验证
163
+
164
+ - 在执行敏感操作前验证权限
165
+ - 防止跨项目/工作空间的未授权操作
166
+ - 保护内置标签不被修改
167
+
168
+ ### URL生成
169
+
170
+ - 自动生成实验和运行的查看URL
171
+ - 在运行终止时显示访问链接
172
+ - 方便用户快速访问实验结果
173
+
174
+ ### 环境配置
175
+
176
+ - 自动设置MLflow跟踪URI
177
+ - 配置运行上下文环境变量
178
+ - 支持国际站和国内站的不同配置
179
+
180
+ ## 项目结构
181
+
182
+ ```
183
+ pre-execute/
184
+ ├── src/
185
+ │ └── wedata_pre_code/
186
+ │ ├── __init__.py
187
+ │ ├── client.py # 主客户端入口
188
+ │ ├── common/
189
+ │ │ ├── __init__.py
190
+ │ │ └── base_client.py # 基础客户端类
191
+ │ ├── wedata2/
192
+ │ │ ├── __init__.py
193
+ │ │ └── client.py # WeData 2.0客户端
194
+ │ └── wedata3/
195
+ │ ├── __init__.py
196
+ │ └── client.py # WeData 3.0客户端
197
+ ├── docs/ # 文档目录
198
+ ├── pyproject.toml # 项目配置
199
+ ├── requirement.txt # 依赖文件
200
+ └── README.md # 项目说明
201
+ ```
202
+
203
+ ## 开发指南
204
+
205
+ ### 添加新的装饰器
206
+
207
+ 要添加新的MLflow客户端方法装饰器,可以参考现有的实现模式:
208
+
209
+ 1. 在相应的客户端类中定义装饰器函数
210
+ 2. 使用`@wraps`保留原函数属性
211
+ 3. 在装饰器内部实现特定的逻辑
212
+ 4. 将装饰器应用到目标MLflow方法
213
+
214
+ ### 测试
215
+
216
+ 确保在修改代码后测试以下场景:
217
+
218
+ - 正常创建实验和运行
219
+ - 权限验证功能
220
+ - 标签注入的正确性
221
+ - URL生成的准确性
222
+
223
+ ## 注意事项
224
+
225
+ - 确保MLflow服务器配置正确
226
+ - 验证环境变量设置完整
227
+ - 注意不同版本客户端的参数差异
228
+ - 在生产环境使用前进行充分测试
229
+
230
+ ## 支持与反馈
231
+
232
+ 如有问题或建议,请联系WeData平台技术支持团队。
@@ -0,0 +1,11 @@
1
+ wedata_pre_code/__init__.py,sha256=-p_dX-VDC0sAqqAwuWLLlnzRNAHad1LF7wOFJw2z_uk,240
2
+ wedata_pre_code/client.py,sha256=IrcbswwI206_Qxxc9IzqXYKkGrWB2IDbIyBzZa1ex0E,1111
3
+ wedata_pre_code/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ wedata_pre_code/common/base_client.py,sha256=Y9lQBOYaDOB7Zy3MR_bPvHSQjN5Onsn8TPOzcTmGMEc,224
5
+ wedata_pre_code/wedata2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ wedata_pre_code/wedata2/client.py,sha256=ScAQRrT-alc2CqatiwLCNAymYfEtWKWTnrCsLTbYEoM,19535
7
+ wedata_pre_code/wedata3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ wedata_pre_code/wedata3/client.py,sha256=CKuuHHU9JZXQeLORtPU3xXSAXzynBFN6jec8UBvrC_I,21228
9
+ wedata_pre_code-1.0.8.dist-info/WHEEL,sha256=eycQt0QpYmJMLKpE3X9iDk8R04v2ZF0x82ogq-zP6bQ,79
10
+ wedata_pre_code-1.0.8.dist-info/METADATA,sha256=3WpiDxxbZigT7G00pLE8nzOJWflPKCXmIMiLYGRGmTQ,7371
11
+ wedata_pre_code-1.0.8.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.9.24
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any