wedata-pre-code 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wedata_pre_code/__init__.py +10 -0
- wedata_pre_code/client.py +37 -0
- wedata_pre_code/common/__init__.py +0 -0
- wedata_pre_code/common/base_client.py +16 -0
- wedata_pre_code/wedata2/__init__.py +0 -0
- wedata_pre_code/wedata2/client.py +433 -0
- wedata_pre_code/wedata3/__init__.py +0 -0
- wedata_pre_code/wedata3/client.py +425 -0
- wedata_pre_code-1.0.8.dist-info/METADATA +232 -0
- wedata_pre_code-1.0.8.dist-info/RECORD +11 -0
- wedata_pre_code-1.0.8.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
__doc__ = """
|
|
2
|
+
Wedata 预执行代码客户端
|
|
3
|
+
如果要初始化Wedata2的预执行代码客户端,请使用init_wedata2_pre_code方法
|
|
4
|
+
如果要初始化Wedata3的预执行代码客户端,请使用init_wedata3_pre_code方法
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PreCodeClient:
|
|
9
|
+
"""
|
|
10
|
+
Wedata 预执行代码客户端
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def init_wedata2_pre_code(self, **kwargs):
|
|
14
|
+
"""
|
|
15
|
+
初始化Wedata2的预执行代码客户端
|
|
16
|
+
:param kwargs: Wedata2的预执行代码客户端的参数
|
|
17
|
+
:return: Wedata2PreCodeClient实例
|
|
18
|
+
"""
|
|
19
|
+
from wedata_pre_code.wedata2.client import Wedata2PreCodeClient
|
|
20
|
+
|
|
21
|
+
client = Wedata2PreCodeClient(**kwargs)
|
|
22
|
+
client.init()
|
|
23
|
+
|
|
24
|
+
return client
|
|
25
|
+
|
|
26
|
+
def init_wedata3_pre_code(self, **kwargs):
|
|
27
|
+
"""
|
|
28
|
+
初始化Wedata3的预执行代码客户端
|
|
29
|
+
:param kwargs: Wedata3的预执行代码客户端的参数
|
|
30
|
+
:return: Wedata3PreCodeClient实例
|
|
31
|
+
"""
|
|
32
|
+
from wedata_pre_code.wedata3.client import Wedata3PreCodeClient
|
|
33
|
+
|
|
34
|
+
client = Wedata3PreCodeClient(**kwargs)
|
|
35
|
+
client.init()
|
|
36
|
+
|
|
37
|
+
return client
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from wedata_pre_code.common.base_client import BaseClient
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Wedata2PreCodeClient(BaseClient):
|
|
6
|
+
"""
|
|
7
|
+
Wedata2预执行代码客户端
|
|
8
|
+
必传参数:
|
|
9
|
+
- wedata_project_id: 项目ID
|
|
10
|
+
- wedata_notebook_engine: 笔记本引擎
|
|
11
|
+
- qcloud_uin: 腾讯云uin
|
|
12
|
+
- qcloud_subuin: 腾讯云subuin
|
|
13
|
+
- wedata_default_feature_store_database: 默认特征存储数据库
|
|
14
|
+
- wedata_feature_store_databases: 特征存储数据库
|
|
15
|
+
- qcloud_region: 区域
|
|
16
|
+
- mlflow_tracking_uri: mlflow跟踪URI
|
|
17
|
+
- kernel_region: 区域
|
|
18
|
+
- feast_remote_address: feast远程地址
|
|
19
|
+
可选参数:
|
|
20
|
+
- kernel_task_name: 任务名称
|
|
21
|
+
- kernel_task_id: 任务ID
|
|
22
|
+
- kernel_submit_form_workflow: 任务提交表单工作流
|
|
23
|
+
- kernel_is_international: 是否国际站
|
|
24
|
+
"""
|
|
25
|
+
wedata_project_id: str
|
|
26
|
+
wedata_notebook_engine: str
|
|
27
|
+
qcloud_uin: str
|
|
28
|
+
qcloud_subuin: str
|
|
29
|
+
wedata_default_feature_store_database: str
|
|
30
|
+
wedata_feature_store_databases: str
|
|
31
|
+
qcloud_region: str
|
|
32
|
+
mlflow_tracking_uri: str
|
|
33
|
+
kernel_region: str
|
|
34
|
+
kernel_task_name: Optional[str] = ""
|
|
35
|
+
kernel_task_id: Optional[str] = ""
|
|
36
|
+
kernel_submit_form_workflow: Optional[str] = ""
|
|
37
|
+
kernel_is_international: Optional[bool] = False
|
|
38
|
+
|
|
39
|
+
def __init__(self, *args, **kwargs):
|
|
40
|
+
super(Wedata2PreCodeClient, self).__init__(*args, **kwargs)
|
|
41
|
+
self.init()
|
|
42
|
+
|
|
43
|
+
def init(self):
|
|
44
|
+
import json
|
|
45
|
+
import os
|
|
46
|
+
from functools import wraps
|
|
47
|
+
|
|
48
|
+
from mlflow.tracking import MlflowClient
|
|
49
|
+
from mlflow.tracking._tracking_service.client import TrackingServiceClient
|
|
50
|
+
|
|
51
|
+
os.environ["WEDATA_PROJECT_ID"] = self.wedata_project_id
|
|
52
|
+
os.environ["WEDATA_NOTEBOOK_ENGINE"] = self.wedata_notebook_engine
|
|
53
|
+
os.environ["QCLOUD_UIN"] = self.qcloud_uin
|
|
54
|
+
os.environ["QCLOUD_SUBUIN"] = self.qcloud_subuin
|
|
55
|
+
os.environ["WEDATA_DEFAULT_FEATURE_STORE_DATABASE"] = (
|
|
56
|
+
self.wedata_default_feature_store_database
|
|
57
|
+
)
|
|
58
|
+
os.environ["WEDATA_FEATURE_STORE_DATABASES"] = (
|
|
59
|
+
self.wedata_feature_store_databases
|
|
60
|
+
)
|
|
61
|
+
os.environ["QCLOUD_REGION"] = self.qcloud_region
|
|
62
|
+
os.environ["MLFLOW_TRACKING_URI"] = self.mlflow_tracking_uri
|
|
63
|
+
|
|
64
|
+
user_name = self.qcloud_uin
|
|
65
|
+
task_name = self.kernel_task_name
|
|
66
|
+
task_id = self.kernel_task_id
|
|
67
|
+
project_id = self.wedata_project_id
|
|
68
|
+
os.environ["WEDATA_PROJECT_ID"] = project_id
|
|
69
|
+
os.environ["KERNEL_SUBMIT_FORM_WORKFLOW"] = self.kernel_submit_form_workflow
|
|
70
|
+
|
|
71
|
+
region = self.kernel_region
|
|
72
|
+
is_international = self.kernel_is_international
|
|
73
|
+
|
|
74
|
+
template = (
|
|
75
|
+
"https://{region}.wedata.tencentcloud.com"
|
|
76
|
+
if is_international
|
|
77
|
+
else "https://{region}.wedata.cloud.tencent.com"
|
|
78
|
+
) # 国际站 # 国内站
|
|
79
|
+
base_url = f"{template.format(region=region)}"
|
|
80
|
+
|
|
81
|
+
run_context_data = {
|
|
82
|
+
"mlflow.source.name": task_name,
|
|
83
|
+
"mlflow.user": user_name,
|
|
84
|
+
"wedata.taskId": task_id,
|
|
85
|
+
"wedata.workflowId": self.kernel_submit_form_workflow,
|
|
86
|
+
"wedata.datascience.type": "MACHINE_LEARNING",
|
|
87
|
+
"wedata.project": project_id,
|
|
88
|
+
}
|
|
89
|
+
run_context_value = json.dumps(run_context_data, indent=None)
|
|
90
|
+
|
|
91
|
+
os.environ["MLFLOW_RUN_CONTEXT"] = run_context_value
|
|
92
|
+
|
|
93
|
+
def log_after_terminated(func):
|
|
94
|
+
@wraps(func)
|
|
95
|
+
def wrapper(self, run_id, *args, **kwargs):
|
|
96
|
+
print("wedata log_after_terminated wrapper")
|
|
97
|
+
# 调用原set_terminated
|
|
98
|
+
result = func(self, run_id, *args, **kwargs)
|
|
99
|
+
# 获取experiment_id
|
|
100
|
+
run_info = self.store.get_run(run_id).info
|
|
101
|
+
run_name = run_info.run_name
|
|
102
|
+
experiment_id = run_info.experiment_id
|
|
103
|
+
experment_url = f"{base_url}/datascience/experiments-single/{experiment_id}?ProjectId={project_id}"
|
|
104
|
+
run_url = f"{base_url}/datascience/experiments/task-detail-learn/{run_id}?ProjectId={project_id}"
|
|
105
|
+
print(f"View run {run_name} at :{run_url}")
|
|
106
|
+
print(f"View experiment at:{experment_url}")
|
|
107
|
+
return result
|
|
108
|
+
|
|
109
|
+
return wrapper
|
|
110
|
+
|
|
111
|
+
from mlflow.models.model import Model
|
|
112
|
+
|
|
113
|
+
def inject_model_version_tag(func):
|
|
114
|
+
@wraps(func)
|
|
115
|
+
def wrapper(*args, **kwargs):
|
|
116
|
+
print("wedata inject_model_version_tag wrapper")
|
|
117
|
+
registered_model_name = kwargs.get("registered_model_name")
|
|
118
|
+
if registered_model_name is None:
|
|
119
|
+
# 如果在 args 里,找到它的位置
|
|
120
|
+
import inspect
|
|
121
|
+
|
|
122
|
+
sig = inspect.signature(func)
|
|
123
|
+
params = list(sig.parameters.keys())
|
|
124
|
+
if "registered_model_name" in params:
|
|
125
|
+
idx = params.index("registered_model_name") - 1 # -1 因为 self
|
|
126
|
+
if len(args) > idx:
|
|
127
|
+
registered_model_name = args[idx]
|
|
128
|
+
result = func(*args, **kwargs)
|
|
129
|
+
model_version = result.registered_model_version
|
|
130
|
+
# 添加 tag
|
|
131
|
+
if registered_model_name and model_version:
|
|
132
|
+
from mlflow import MlflowClient
|
|
133
|
+
|
|
134
|
+
MlflowClient().set_model_version_tag(
|
|
135
|
+
registered_model_name,
|
|
136
|
+
model_version,
|
|
137
|
+
"mlflow.user",
|
|
138
|
+
"{user_name}",
|
|
139
|
+
)
|
|
140
|
+
MlflowClient().set_model_version_tag(
|
|
141
|
+
registered_model_name,
|
|
142
|
+
model_version,
|
|
143
|
+
"wedata.project",
|
|
144
|
+
"{project_id}",
|
|
145
|
+
)
|
|
146
|
+
MlflowClient().set_model_version_tag(
|
|
147
|
+
registered_model_name,
|
|
148
|
+
model_version,
|
|
149
|
+
"wedata.datascience.type",
|
|
150
|
+
"MACHINE_LEARNING",
|
|
151
|
+
)
|
|
152
|
+
return result
|
|
153
|
+
|
|
154
|
+
return wrapper
|
|
155
|
+
|
|
156
|
+
Model.log = inject_model_version_tag(Model.log)
|
|
157
|
+
|
|
158
|
+
def inject_project_filter(func):
|
|
159
|
+
@wraps(func)
|
|
160
|
+
def wrapper(*args, **kwargs):
|
|
161
|
+
# 从环境变量获取 project 值
|
|
162
|
+
project = os.getenv("WEDATA_PROJECT_ID")
|
|
163
|
+
if project:
|
|
164
|
+
# 获取原始过滤条件
|
|
165
|
+
filter_str = kwargs.get("filter_string", "")
|
|
166
|
+
# 拼接新的过滤条件(假设 project 存储在 run 的 tag 中)
|
|
167
|
+
new_filter = f"tags.wedata.project = '{project}'"
|
|
168
|
+
if filter_str:
|
|
169
|
+
new_filter = f"({filter_str}) and ({new_filter})"
|
|
170
|
+
kwargs["filter_string"] = new_filter
|
|
171
|
+
return func(*args, **kwargs)
|
|
172
|
+
|
|
173
|
+
return wrapper
|
|
174
|
+
|
|
175
|
+
def inject_project_tag(func):
|
|
176
|
+
@wraps(func)
|
|
177
|
+
def wrapper(self, *args, **kwargs):
|
|
178
|
+
project = os.getenv("WEDATA_PROJECT_ID")
|
|
179
|
+
workflow_id = os.getenv("KERNEL_SUBMIT_FORM_WORKFLOW")
|
|
180
|
+
args_list = list(args)
|
|
181
|
+
if project:
|
|
182
|
+
if "tags" in kwargs:
|
|
183
|
+
tags = kwargs["tags"] or {}
|
|
184
|
+
tags = tags.copy()
|
|
185
|
+
tags["wedata.project"] = project
|
|
186
|
+
tags["wedata.datascience.type"] = "MACHINE_LEARNING"
|
|
187
|
+
tags["wedata.workflowId"] = workflow_id
|
|
188
|
+
kwargs["tags"] = tags
|
|
189
|
+
else:
|
|
190
|
+
current_tags = {}
|
|
191
|
+
method_name = func.__name__
|
|
192
|
+
if current_tags == None:
|
|
193
|
+
if method_name in ("create_experiment", "create_run"):
|
|
194
|
+
if len(args_list) >= 3:
|
|
195
|
+
current_tags = args_list[2]
|
|
196
|
+
elif method_name in ("create_registered_model"):
|
|
197
|
+
if len(args_list) >= 2:
|
|
198
|
+
current_tags = args[1]
|
|
199
|
+
elif method_name in ("create_model_version"):
|
|
200
|
+
if len(args_list) >= 5:
|
|
201
|
+
current_tags = args[4]
|
|
202
|
+
if current_tags is None:
|
|
203
|
+
current_tags = {}
|
|
204
|
+
else:
|
|
205
|
+
current_tags = current_tags.copy() # 避免修改原始字典
|
|
206
|
+
current_tags["wedata.project"] = project
|
|
207
|
+
current_tags["wedata.datascience.type"] = "MACHINE_LEARNING"
|
|
208
|
+
current_tags["mlflow.user"] = "{user_name}"
|
|
209
|
+
kwargs["tags"] = current_tags
|
|
210
|
+
return func(self, *args, **kwargs)
|
|
211
|
+
|
|
212
|
+
return wrapper
|
|
213
|
+
|
|
214
|
+
def validate_wedata_tag(func):
|
|
215
|
+
@wraps(func)
|
|
216
|
+
def wrapper(*args, **kwargs):
|
|
217
|
+
project = os.getenv("WEDATA_PROJECT_ID")
|
|
218
|
+
# 调用原始方法获取 Experiment
|
|
219
|
+
obj = func(*args, **kwargs)
|
|
220
|
+
|
|
221
|
+
# 如果 Experiment 不存在,直接返回错误
|
|
222
|
+
if obj is None:
|
|
223
|
+
# print("object is not exists")
|
|
224
|
+
return obj
|
|
225
|
+
|
|
226
|
+
project_tag = None
|
|
227
|
+
datascience_type_tag = None
|
|
228
|
+
method_name = func.__name__
|
|
229
|
+
obj_name = "object"
|
|
230
|
+
if "run" in method_name:
|
|
231
|
+
project_tag = obj.data.tags.get("wedata.project")
|
|
232
|
+
datascience_type_tag = obj.data.tags.get("wedata.datascience.type")
|
|
233
|
+
obj_name = "run"
|
|
234
|
+
elif "experiment" in method_name:
|
|
235
|
+
obj_name = "experiment"
|
|
236
|
+
project_tag = obj.tags.get("wedata.project")
|
|
237
|
+
datascience_type_tag = obj.tags.get("wedata.datascience.type")
|
|
238
|
+
elif "model" in method_name:
|
|
239
|
+
obj_name = "model"
|
|
240
|
+
project_tag = obj.tags.get("wedata.project")
|
|
241
|
+
datascience_type_tag = obj.tags.get("wedata.datascience.type")
|
|
242
|
+
# 检查标签是否存在且值正确
|
|
243
|
+
if project and project_tag != project:
|
|
244
|
+
print(f"this project:{project},has no {obj_name}")
|
|
245
|
+
return None
|
|
246
|
+
if datascience_type_tag not in ['MACHINE_LEARNING','AUTOML_REGRESSION',
|
|
247
|
+
'AUTOML_CLASSIFICATION','AUTOML_PREDICTION']:
|
|
248
|
+
print(
|
|
249
|
+
"Only ['MACHINE_LEARNING','AUTOML_REGRESSION','AUTOML_CLASSIFICATION','AUTOML_PREDICTION']"
|
|
250
|
+
" experiment/run/model can be operated in the notebook"
|
|
251
|
+
)
|
|
252
|
+
return None
|
|
253
|
+
return obj
|
|
254
|
+
|
|
255
|
+
return wrapper
|
|
256
|
+
|
|
257
|
+
def validate_wedata_before_operation(func):
|
|
258
|
+
@wraps(func)
|
|
259
|
+
def wrapper(self, *args, **kwargs):
|
|
260
|
+
project = os.getenv("WEDATA_PROJECT_ID")
|
|
261
|
+
# 如果未设置环境变量,直接执行原删除操作
|
|
262
|
+
if not project:
|
|
263
|
+
return func(self, *args, **kwargs)
|
|
264
|
+
method_name = func.__name__
|
|
265
|
+
|
|
266
|
+
id_name = None
|
|
267
|
+
res = None
|
|
268
|
+
project_tag = None
|
|
269
|
+
data_science_type = None
|
|
270
|
+
# 如果设置了环境变量,则校验标签
|
|
271
|
+
# 获取 Experiment 对象
|
|
272
|
+
if "experiment" in method_name:
|
|
273
|
+
id_name = kwargs.get("experiment_id") or (args[0] if args else None)
|
|
274
|
+
res = self.get_experiment(id_name)
|
|
275
|
+
if not res:
|
|
276
|
+
print(
|
|
277
|
+
f"Experiment: '{id_name}' not exist or does not have permission to operate"
|
|
278
|
+
)
|
|
279
|
+
return
|
|
280
|
+
project_tag = res.tags.get("wedata.project")
|
|
281
|
+
data_science_type = res.tags.get("wedata.datascience.type")
|
|
282
|
+
elif "model" in method_name:
|
|
283
|
+
id_name = kwargs.get("name") or (args[0] if args else None)
|
|
284
|
+
res = self.get_registered_model(id_name)
|
|
285
|
+
if not res:
|
|
286
|
+
print(
|
|
287
|
+
f"Model '{id_name}' not exist or does not have permission to operate"
|
|
288
|
+
)
|
|
289
|
+
return
|
|
290
|
+
project_tag = res.tags.get("wedata.project")
|
|
291
|
+
data_science_type = res.tags.get("wedata.datascience.type")
|
|
292
|
+
else:
|
|
293
|
+
id_name = kwargs.get("run_id") or (args[0] if args else None)
|
|
294
|
+
res = self.get_run(id_name)
|
|
295
|
+
if not res:
|
|
296
|
+
print(
|
|
297
|
+
f"run: '{id_name}' not exist or does not have permission to operate"
|
|
298
|
+
)
|
|
299
|
+
return
|
|
300
|
+
project_tag = res.data.tags.get("wedata.project")
|
|
301
|
+
data_science_type = res.data.tags.get("wedata.datascience.type")
|
|
302
|
+
# print(f"query result:{res}")
|
|
303
|
+
# 检查标签是否匹配
|
|
304
|
+
if project_tag != project or data_science_type not in ['MACHINE_LEARNING','AUTOML_REGRESSION','AUTOML_CLASSIFICATION','AUTOML_PREDICTION']:
|
|
305
|
+
print(f"Unauthorized operation:{method_name} ({id_name})")
|
|
306
|
+
return # 不执行删除
|
|
307
|
+
|
|
308
|
+
# print(method_name)
|
|
309
|
+
# 操作标签的操作需要确认不会影响内置标签wedata.project
|
|
310
|
+
if method_name in (
|
|
311
|
+
"update_tag",
|
|
312
|
+
"delete_tags",
|
|
313
|
+
"set_registered_model_tag",
|
|
314
|
+
"delete_registered_model_tag",
|
|
315
|
+
"delete_model_version_tag",
|
|
316
|
+
"set_experiment_tag",
|
|
317
|
+
):
|
|
318
|
+
# 获取 key 参数的值
|
|
319
|
+
key_value = kwargs.get("key") or (args[1] if args else None)
|
|
320
|
+
print(key_value)
|
|
321
|
+
if key_value == "wedata.project":
|
|
322
|
+
print(f"No permission to operate protected tags: {key_value}")
|
|
323
|
+
return
|
|
324
|
+
# 标签匹配,执行删除
|
|
325
|
+
return func(self, *args, **kwargs)
|
|
326
|
+
|
|
327
|
+
return wrapper
|
|
328
|
+
|
|
329
|
+
# 1. 应用装饰器,过滤条件filter_str 中添加tag
|
|
330
|
+
MlflowClient.search_experiments = inject_project_filter(
|
|
331
|
+
MlflowClient.search_experiments
|
|
332
|
+
)
|
|
333
|
+
MlflowClient.search_runs = inject_project_filter(MlflowClient.search_runs)
|
|
334
|
+
MlflowClient.search_registered_models = inject_project_filter(
|
|
335
|
+
MlflowClient.search_registered_models
|
|
336
|
+
)
|
|
337
|
+
MlflowClient.search_model_versions = inject_project_filter(
|
|
338
|
+
MlflowClient.search_model_versions
|
|
339
|
+
)
|
|
340
|
+
MlflowClient.create_experiment = inject_project_tag(
|
|
341
|
+
MlflowClient.create_experiment
|
|
342
|
+
)
|
|
343
|
+
MlflowClient.create_registered_model = inject_project_tag(
|
|
344
|
+
MlflowClient.create_registered_model
|
|
345
|
+
)
|
|
346
|
+
MlflowClient.create_model_version = inject_project_tag(
|
|
347
|
+
MlflowClient.create_model_version
|
|
348
|
+
)
|
|
349
|
+
# 2. 后置返回结果过滤wedata_project tag
|
|
350
|
+
MlflowClient.get_experiment = validate_wedata_tag(MlflowClient.get_experiment)
|
|
351
|
+
MlflowClient.get_experiment_by_name = validate_wedata_tag(
|
|
352
|
+
MlflowClient.get_experiment_by_name
|
|
353
|
+
)
|
|
354
|
+
MlflowClient.get_run = validate_wedata_tag(MlflowClient.get_run)
|
|
355
|
+
MlflowClient.get_parent_run = validate_wedata_tag(MlflowClient.get_parent_run)
|
|
356
|
+
MlflowClient.get_registered_model = validate_wedata_tag(
|
|
357
|
+
MlflowClient.get_registered_model
|
|
358
|
+
)
|
|
359
|
+
TrackingServiceClient.set_terminated = log_after_terminated(
|
|
360
|
+
TrackingServiceClient.set_terminated
|
|
361
|
+
)
|
|
362
|
+
# MlflowClient.get_model_version = validate_wedata_tag(MlflowClient.get_model_version)
|
|
363
|
+
# MlflowClient.get_model_version_download_uri = validate_wedata_tag(MlflowClient.get_model_version_download_uri)
|
|
364
|
+
# MlflowClient.get_latest_versions = validate_wedata_tag(MlflowClient.get_latest_versions)
|
|
365
|
+
# 4. 操作前校验,参数experment_id
|
|
366
|
+
MlflowClient.delete_experiment = validate_wedata_before_operation(
|
|
367
|
+
MlflowClient.delete_experiment
|
|
368
|
+
)
|
|
369
|
+
MlflowClient.restore_experiment = validate_wedata_before_operation(
|
|
370
|
+
MlflowClient.restore_experiment
|
|
371
|
+
)
|
|
372
|
+
MlflowClient.rename_experiment = validate_wedata_before_operation(
|
|
373
|
+
MlflowClient.rename_experiment
|
|
374
|
+
)
|
|
375
|
+
MlflowClient.set_experiment_tag = validate_wedata_before_operation(
|
|
376
|
+
MlflowClient.set_experiment_tag
|
|
377
|
+
)
|
|
378
|
+
# 操作前校验 参数run_id
|
|
379
|
+
MlflowClient.set_tag = validate_wedata_before_operation(MlflowClient.set_tag)
|
|
380
|
+
MlflowClient.delete_tag = validate_wedata_before_operation(
|
|
381
|
+
MlflowClient.delete_tag
|
|
382
|
+
)
|
|
383
|
+
MlflowClient.update_run = validate_wedata_before_operation(
|
|
384
|
+
MlflowClient.update_run
|
|
385
|
+
)
|
|
386
|
+
MlflowClient.download_artifacts = validate_wedata_before_operation(
|
|
387
|
+
MlflowClient.download_artifacts
|
|
388
|
+
)
|
|
389
|
+
MlflowClient.list_artifacts = validate_wedata_before_operation(
|
|
390
|
+
MlflowClient.list_artifacts
|
|
391
|
+
)
|
|
392
|
+
MlflowClient.delete_run = validate_wedata_before_operation(
|
|
393
|
+
MlflowClient.delete_run
|
|
394
|
+
)
|
|
395
|
+
MlflowClient.restore_run = validate_wedata_before_operation(
|
|
396
|
+
MlflowClient.restore_run
|
|
397
|
+
)
|
|
398
|
+
# 操作前校验 参数name
|
|
399
|
+
MlflowClient.rename_registered_model = validate_wedata_before_operation(
|
|
400
|
+
MlflowClient.rename_registered_model
|
|
401
|
+
)
|
|
402
|
+
MlflowClient.update_registered_model = validate_wedata_before_operation(
|
|
403
|
+
MlflowClient.update_registered_model
|
|
404
|
+
)
|
|
405
|
+
MlflowClient.delete_registered_model = validate_wedata_before_operation(
|
|
406
|
+
MlflowClient.delete_registered_model
|
|
407
|
+
)
|
|
408
|
+
MlflowClient.update_model_version = validate_wedata_before_operation(
|
|
409
|
+
MlflowClient.update_model_version
|
|
410
|
+
)
|
|
411
|
+
MlflowClient.delete_model_version = validate_wedata_before_operation(
|
|
412
|
+
MlflowClient.delete_model_version
|
|
413
|
+
)
|
|
414
|
+
MlflowClient.set_model_version_tag = validate_wedata_before_operation(
|
|
415
|
+
MlflowClient.set_model_version_tag
|
|
416
|
+
)
|
|
417
|
+
MlflowClient.delete_model_version_tag = validate_wedata_before_operation(
|
|
418
|
+
MlflowClient.delete_model_version_tag
|
|
419
|
+
)
|
|
420
|
+
MlflowClient.set_registered_model_alias = validate_wedata_before_operation(
|
|
421
|
+
MlflowClient.set_registered_model_alias
|
|
422
|
+
)
|
|
423
|
+
MlflowClient.delete_registered_model_alias = validate_wedata_before_operation(
|
|
424
|
+
MlflowClient.delete_registered_model_alias
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# TOOD:设置tag相关需要校验设置的key是否为wedata_project
|
|
428
|
+
MlflowClient.set_registered_model_tag = validate_wedata_before_operation(
|
|
429
|
+
MlflowClient.set_registered_model_tag
|
|
430
|
+
)
|
|
431
|
+
MlflowClient.delete_registered_model_tag = validate_wedata_before_operation(
|
|
432
|
+
MlflowClient.delete_registered_model_tag
|
|
433
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from wedata_pre_code.common.base_client import BaseClient
|
|
4
|
+
|
|
5
|
+
__doc__ = """
|
|
6
|
+
Wedata3预执行代码客户端
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
PROXY_HEADER_KEY_IP = "X-Target-Service-IP"
|
|
10
|
+
PROXY_HEADER_KEY_PORT = "X-Target-Service-PORT"
|
|
11
|
+
|
|
12
|
+
FEAST_PROXY_ENV_KEY_IP = "FEAST_SERVICE_IP"
|
|
13
|
+
FEAST_PROXY_ENV_KEY_PORT = "FEAST_SERVICE_PORT"
|
|
14
|
+
|
|
15
|
+
MLFLOW_PROXY_ENV_KEY_IP = "MLFLOW_SERVICE_IP"
|
|
16
|
+
MLFLOW_PROXY_ENV_KEY_PORT = "MLFLOW_SERVICE_PORT"
|
|
17
|
+
|
|
18
|
+
KERNEL_WEDATA_PREFIX = "KERNEL_WEDATA_"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_kernel_env_key(key):
|
|
22
|
+
"""
|
|
23
|
+
设置Wedata特定的变量前缀,避免于系统内置变量冲突。
|
|
24
|
+
例如:key = "REGION" , 最终返回 "KERNEL_WEDATA_REGION"
|
|
25
|
+
:param key:
|
|
26
|
+
:return:
|
|
27
|
+
"""
|
|
28
|
+
return KERNEL_WEDATA_PREFIX + key
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Wedata3PreCodeClient(BaseClient):
|
|
32
|
+
"""
|
|
33
|
+
Wedata3预执行代码客户端
|
|
34
|
+
必传参数:
|
|
35
|
+
- workspace_id: 工作空间ID
|
|
36
|
+
- mlflow_tracking_uri: mlflow跟踪URI
|
|
37
|
+
- base_url: 基础URL
|
|
38
|
+
- mlflow_gateway_url: mlflow基础serverless网关地址
|
|
39
|
+
- feast_gateway_url: feast基础serverless网关地址
|
|
40
|
+
- mlflow_proxy_ip: mlflow转发地址
|
|
41
|
+
- mlflow_proxy_port: mlflow转发端口
|
|
42
|
+
- feast_proxy_ip: feast转发地址
|
|
43
|
+
- feast_proxy_port: feast转发端口
|
|
44
|
+
可选参数:
|
|
45
|
+
- region: 区域
|
|
46
|
+
- ap_region_id: 区域ID
|
|
47
|
+
- kernel_task_name: Notebook路径名
|
|
48
|
+
- kernel_task_id: Notebook文件ID
|
|
49
|
+
- kernel_submit_form_workflow: 任务提交表单工作流
|
|
50
|
+
- kernel_is_international: 是否国际站
|
|
51
|
+
- cloud_sdk_secret_id: 云SDK密钥ID
|
|
52
|
+
- cloud_sdk_secret_key: 云SDK密钥KEY
|
|
53
|
+
- cloud_sdk_secret_token: 云SDK密钥TOKEN
|
|
54
|
+
- qcloud_uin: 腾讯云uin
|
|
55
|
+
- qcloud_subuin: 腾讯云subuin
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
workspace_id: str
|
|
59
|
+
base_url: str
|
|
60
|
+
region: Optional[str] = ""
|
|
61
|
+
ap_region_id: Optional[int] = 0
|
|
62
|
+
# 基础serverless网关地址
|
|
63
|
+
mlflow_gateway_url: str
|
|
64
|
+
feast_gateway_url: str
|
|
65
|
+
# mlflow转发地址
|
|
66
|
+
mlflow_proxy_ip: str
|
|
67
|
+
mlflow_proxy_port: str
|
|
68
|
+
# feast转发地址
|
|
69
|
+
feast_proxy_ip: str
|
|
70
|
+
feast_proxy_port: str
|
|
71
|
+
# 系统内置变量
|
|
72
|
+
kernel_task_name: Optional[str]
|
|
73
|
+
kernel_task_id: Optional[str]
|
|
74
|
+
kernel_submit_form_workflow: Optional[str] = ""
|
|
75
|
+
# SDK相关
|
|
76
|
+
cloud_sdk_secret_id: Optional[str] = ""
|
|
77
|
+
cloud_sdk_secret_key: Optional[str] = ""
|
|
78
|
+
cloud_sdk_secret_token: Optional[str] = ""
|
|
79
|
+
# 帐号相关
|
|
80
|
+
qcloud_uin: Optional[str] = ""
|
|
81
|
+
qcloud_subuin: Optional[str] = ""
|
|
82
|
+
|
|
83
|
+
def init(self):
|
|
84
|
+
import inspect
|
|
85
|
+
import json
|
|
86
|
+
import os
|
|
87
|
+
from functools import wraps
|
|
88
|
+
|
|
89
|
+
import feast
|
|
90
|
+
import feast.infra.registry.remote
|
|
91
|
+
import grpc
|
|
92
|
+
import mlflow
|
|
93
|
+
from feast.errors import FeastError
|
|
94
|
+
from feast.protos.feast.registry import RegistryServer_pb2_grpc
|
|
95
|
+
from mlflow.models.model import Model
|
|
96
|
+
from mlflow.tracking import MlflowClient
|
|
97
|
+
from mlflow.tracking._tracking_service.client import TrackingServiceClient
|
|
98
|
+
|
|
99
|
+
mlflow_tracking_uri = f"http://{self.mlflow_proxy_ip}:{self.mlflow_proxy_port}"
|
|
100
|
+
feast_remote_address = f"{self.feast_proxy_ip}:{self.feast_proxy_port}"
|
|
101
|
+
|
|
102
|
+
if self.mlflow_gateway_url:
|
|
103
|
+
mlflow_tracking_uri = f"http://{self.mlflow_gateway_url}"
|
|
104
|
+
os.environ[MLFLOW_PROXY_ENV_KEY_IP] = self.mlflow_proxy_ip
|
|
105
|
+
os.environ[MLFLOW_PROXY_ENV_KEY_PORT] = self.mlflow_proxy_port
|
|
106
|
+
|
|
107
|
+
if self.feast_gateway_url:
|
|
108
|
+
feast_remote_address = self.feast_gateway_url
|
|
109
|
+
os.environ[FEAST_PROXY_ENV_KEY_IP] = self.feast_proxy_ip
|
|
110
|
+
os.environ[FEAST_PROXY_ENV_KEY_PORT] = self.feast_proxy_port
|
|
111
|
+
|
|
112
|
+
# os.environ["MLFLOW_RUN_CONTEXT"] = self.run_context_data
|
|
113
|
+
os.environ["WEDATA_WORKSPACE_ID"] = self.workspace_id
|
|
114
|
+
os.environ["MLFLOW_TRACKING_URI"] = mlflow_tracking_uri
|
|
115
|
+
os.environ[get_kernel_env_key("REGION")] = self.region
|
|
116
|
+
|
|
117
|
+
os.environ["KERNEL_FEAST_REMOTE_ADDRESS"] = feast_remote_address
|
|
118
|
+
|
|
119
|
+
# 设置系统内置变量
|
|
120
|
+
os.environ[get_kernel_env_key("TASK_NAME")] = self.kernel_task_name
|
|
121
|
+
os.environ[get_kernel_env_key("TASK_ID")] = self.kernel_task_id
|
|
122
|
+
os.environ[get_kernel_env_key("SUBMIT_FORM_WORKFLOW")] = self.kernel_submit_form_workflow
|
|
123
|
+
os.environ[get_kernel_env_key("CLOUD_SDK_SECRET_ID")] = self.cloud_sdk_secret_id
|
|
124
|
+
os.environ[get_kernel_env_key("CLOUD_SDK_SECRET_KEY")] = self.cloud_sdk_secret_key
|
|
125
|
+
os.environ[get_kernel_env_key("CLOUD_SDK_SECRET_TOKEN")] = self.cloud_sdk_secret_token
|
|
126
|
+
os.environ[get_kernel_env_key("QCLOUD_UIN")] = self.qcloud_uin
|
|
127
|
+
os.environ[get_kernel_env_key("QCLOUD_SUBUIN")] = self.qcloud_subuin
|
|
128
|
+
|
|
129
|
+
mlflow.set_tracking_uri(mlflow_tracking_uri)
|
|
130
|
+
|
|
131
|
+
if not os.environ.get("MLFLOW_RUN_CONTEXT"):
|
|
132
|
+
# 避免重复设置
|
|
133
|
+
run_context_data = {
|
|
134
|
+
"mlflow.source.name": self.kernel_task_name,
|
|
135
|
+
"mlflow.user": self.qcloud_uin,
|
|
136
|
+
"wedata.taskId": self.kernel_task_id,
|
|
137
|
+
"wedata.workflowId": self.kernel_submit_form_workflow,
|
|
138
|
+
"wedata.datascience.type": "MACHINE_LEARNING",
|
|
139
|
+
"wedata.workspace": self.workspace_id,
|
|
140
|
+
}
|
|
141
|
+
run_context_value = json.dumps(run_context_data, indent=None)
|
|
142
|
+
|
|
143
|
+
os.environ["MLFLOW_RUN_CONTEXT"] = run_context_value
|
|
144
|
+
|
|
145
|
+
if self.region:
|
|
146
|
+
# 日志输出装饰器
|
|
147
|
+
base_url = self.base_url
|
|
148
|
+
workspace_id = self.workspace_id
|
|
149
|
+
ap_region_id = self.ap_region_id
|
|
150
|
+
|
|
151
|
+
def log_after_terminated(func):
|
|
152
|
+
@wraps(func)
|
|
153
|
+
def wrapper(self, run_id, *args, **kwargs):
|
|
154
|
+
print("wedata log_after_terminated wrapper")
|
|
155
|
+
result = func(self, run_id, *args, **kwargs)
|
|
156
|
+
run_info = self.store.get_run(run_id).info
|
|
157
|
+
run_name = run_info.run_name
|
|
158
|
+
experiment_id = run_info.experiment_id
|
|
159
|
+
experiment_url = f"${base_url}/datascience/experiments/experiments-single/{experiment_id}?o=${workspace_id}&r={ap_region_id}"
|
|
160
|
+
run_url = f"${base_url}/datascience/experiments/task-detail-learn/{run_id}?o=${workspace_id}&r={ap_region_id}"
|
|
161
|
+
print(f"View run {run_name} at :{run_url}")
|
|
162
|
+
print(f"View experiment at:{experiment_url}")
|
|
163
|
+
return result
|
|
164
|
+
|
|
165
|
+
return wrapper
|
|
166
|
+
|
|
167
|
+
TrackingServiceClient.set_terminated = log_after_terminated(TrackingServiceClient.set_terminated)
|
|
168
|
+
|
|
169
|
+
# 模型版本标签注入装饰器
|
|
170
|
+
def inject_model_version_tag(func):
|
|
171
|
+
@wraps(func)
|
|
172
|
+
def wrapper(*args, **kwargs):
|
|
173
|
+
print("wedata inject_model_version_tag wrapper")
|
|
174
|
+
registered_model_name = kwargs.get("registered_model_name")
|
|
175
|
+
if registered_model_name is None:
|
|
176
|
+
sig = inspect.signature(func)
|
|
177
|
+
params = list(sig.parameters.keys())
|
|
178
|
+
if "registered_model_name" in params:
|
|
179
|
+
idx = params.index("registered_model_name") - 1
|
|
180
|
+
if len(args) > idx:
|
|
181
|
+
registered_model_name = args[idx]
|
|
182
|
+
result = func(*args, **kwargs)
|
|
183
|
+
model_version = result.registered_model_version
|
|
184
|
+
if registered_model_name and model_version:
|
|
185
|
+
from mlflow import MlflowClient
|
|
186
|
+
|
|
187
|
+
MlflowClient().set_model_version_tag(registered_model_name, model_version, "mlflow.user", "${uin}")
|
|
188
|
+
MlflowClient().set_model_version_tag(
|
|
189
|
+
registered_model_name,
|
|
190
|
+
model_version,
|
|
191
|
+
"wedata.workspace",
|
|
192
|
+
"${workspaceId}",
|
|
193
|
+
)
|
|
194
|
+
MlflowClient().set_model_version_tag(
|
|
195
|
+
registered_model_name,
|
|
196
|
+
model_version,
|
|
197
|
+
"wedata.datascience.type",
|
|
198
|
+
"MACHINE_LEARNING",
|
|
199
|
+
)
|
|
200
|
+
return result
|
|
201
|
+
|
|
202
|
+
return wrapper
|
|
203
|
+
|
|
204
|
+
Model.log = inject_model_version_tag(Model.log)
|
|
205
|
+
|
|
206
|
+
# 项目标签注入装饰器
|
|
207
|
+
def inject_workspace_tag(func):
|
|
208
|
+
@wraps(func)
|
|
209
|
+
def wrapper(self, *args, **kwargs):
|
|
210
|
+
workspace = os.getenv("WEDATA_WORKSPACE_ID")
|
|
211
|
+
args_list = list(args)
|
|
212
|
+
if workspace:
|
|
213
|
+
if "tags" in kwargs:
|
|
214
|
+
tags = kwargs["tags"] or {}
|
|
215
|
+
tags = tags.copy()
|
|
216
|
+
# 如果传入的参数中有wedata.workspace和wedata.datascience.type,则不进行注入
|
|
217
|
+
if "wedata.workspace" not in tags:
|
|
218
|
+
tags["wedata.workspace"] = workspace
|
|
219
|
+
if "wedata.datascience.type" not in tags:
|
|
220
|
+
tags["wedata.datascience.type"] = "MACHINE_LEARNING"
|
|
221
|
+
kwargs["tags"] = tags
|
|
222
|
+
else:
|
|
223
|
+
current_tags = None
|
|
224
|
+
method_name = func.__name__
|
|
225
|
+
if current_tags is None:
|
|
226
|
+
if method_name in ("create_experiment", "create_run"):
|
|
227
|
+
if len(args_list) >= 3:
|
|
228
|
+
current_tags = args_list[2]
|
|
229
|
+
elif method_name in ("create_registered_model"):
|
|
230
|
+
if len(args_list) >= 2:
|
|
231
|
+
current_tags = args_list[1]
|
|
232
|
+
elif method_name in ("create_model_version"):
|
|
233
|
+
if len(args_list) >= 5:
|
|
234
|
+
current_tags = args_list[4]
|
|
235
|
+
if current_tags is None:
|
|
236
|
+
current_tags = {}
|
|
237
|
+
else:
|
|
238
|
+
current_tags = current_tags.copy()
|
|
239
|
+
current_tags["wedata.workspace"] = workspace
|
|
240
|
+
current_tags["wedata.datascience.type"] = "MACHINE_LEARNING"
|
|
241
|
+
current_tags["mlflow.user"] = "${uin}"
|
|
242
|
+
kwargs["tags"] = current_tags
|
|
243
|
+
return func(self, *args, **kwargs)
|
|
244
|
+
|
|
245
|
+
return wrapper
|
|
246
|
+
|
|
247
|
+
# 标签验证装饰器
|
|
248
|
+
def validate_wedata_tag(func):
|
|
249
|
+
@wraps(func)
|
|
250
|
+
def wrapper(*args, **kwargs):
|
|
251
|
+
workspace = os.getenv("WEDATA_WORKSPACE_ID")
|
|
252
|
+
obj = func(*args, **kwargs)
|
|
253
|
+
if obj is None:
|
|
254
|
+
return obj
|
|
255
|
+
workspace_tag = None
|
|
256
|
+
datascience_type_tag = None
|
|
257
|
+
method_name = func.__name__
|
|
258
|
+
obj_name = "object"
|
|
259
|
+
if "run" in method_name:
|
|
260
|
+
workspace_tag = obj.data.tags.get("wedata.workspace")
|
|
261
|
+
datascience_type_tag = obj.data.tags.get("wedata.datascience.type")
|
|
262
|
+
obj_name = "run"
|
|
263
|
+
elif "experiment" in method_name:
|
|
264
|
+
obj_name = "experiment"
|
|
265
|
+
workspace_tag = obj.tags.get("wedata.workspace")
|
|
266
|
+
datascience_type_tag = obj.tags.get("wedata.datascience.type")
|
|
267
|
+
elif "model" in method_name:
|
|
268
|
+
obj_name = "model"
|
|
269
|
+
workspace_tag = obj.tags.get("wedata.workspace")
|
|
270
|
+
datascience_type_tag = obj.tags.get("wedata.datascience.type")
|
|
271
|
+
if workspace and workspace_tag != workspace:
|
|
272
|
+
print(f"this workspace:{workspace},has no {obj_name}")
|
|
273
|
+
return None
|
|
274
|
+
if datascience_type_tag not in ("MACHINE_LEARNING", "DEEP_LEARNING"):
|
|
275
|
+
print("Only MACHINE_LEARNING and DEEP_LEARNING experiment/run/model can be operated in the notebook")
|
|
276
|
+
return None
|
|
277
|
+
return obj
|
|
278
|
+
|
|
279
|
+
return wrapper
|
|
280
|
+
|
|
281
|
+
# 操作前验证装饰器
|
|
282
|
+
def validate_wedata_before_operation(func):
|
|
283
|
+
@wraps(func)
|
|
284
|
+
def wrapper(self, *args, **kwargs):
|
|
285
|
+
workspace = os.getenv("WEDATA_WORKSPACE_ID")
|
|
286
|
+
if not workspace:
|
|
287
|
+
return func(self, *args, **kwargs)
|
|
288
|
+
method_name = func.__name__
|
|
289
|
+
id_name = None
|
|
290
|
+
res = None
|
|
291
|
+
workspace_tag = None
|
|
292
|
+
data_science_type = None
|
|
293
|
+
if "experiment" in method_name:
|
|
294
|
+
id_name = kwargs.get("experiment_id") or (args[0] if args else None)
|
|
295
|
+
res = self.get_experiment(id_name)
|
|
296
|
+
if not res:
|
|
297
|
+
print(f"Experiment: '{id_name}' not exist or does not have permission to operate")
|
|
298
|
+
return
|
|
299
|
+
workspace_tag = res.tags.get("wedata.workspace")
|
|
300
|
+
data_science_type = res.tags.get("wedata.datascience.type")
|
|
301
|
+
elif "model" in method_name:
|
|
302
|
+
id_name = kwargs.get("name") or (args[0] if args else None)
|
|
303
|
+
res = self.get_registered_model(id_name)
|
|
304
|
+
if not res:
|
|
305
|
+
print(f"Model '{id_name}' not exist or does not have permission to operate")
|
|
306
|
+
return
|
|
307
|
+
workspace_tag = res.tags.get("wedata.workspace")
|
|
308
|
+
data_science_type = res.tags.get("wedata.datascience.type")
|
|
309
|
+
else:
|
|
310
|
+
id_name = kwargs.get("run_id") or (args[0] if args else None)
|
|
311
|
+
res = self.get_run(id_name)
|
|
312
|
+
if not res:
|
|
313
|
+
print(f"run: '{id_name}' not exist or does not have permission to operate")
|
|
314
|
+
return
|
|
315
|
+
workspace_tag = res.data.tags.get("wedata.workspace")
|
|
316
|
+
data_science_type = res.data.tags.get("wedata.datascience.type")
|
|
317
|
+
if workspace_tag != workspace or data_science_type not in (
|
|
318
|
+
"MACHINE_LEARNING",
|
|
319
|
+
"DEEP_LEARNING",
|
|
320
|
+
):
|
|
321
|
+
print(f"Unauthorized operation:{method_name} ({id_name})")
|
|
322
|
+
return
|
|
323
|
+
if method_name in (
|
|
324
|
+
"update_tag",
|
|
325
|
+
"delete_tags",
|
|
326
|
+
"set_registered_model_tag",
|
|
327
|
+
"delete_registered_model_tag",
|
|
328
|
+
"delete_model_version_tag",
|
|
329
|
+
"set_experiment_tag",
|
|
330
|
+
):
|
|
331
|
+
key_value = kwargs.get("key") or (args[1] if args else None)
|
|
332
|
+
if key_value == "wedata.workspace":
|
|
333
|
+
print(f"No permission to operate protected tags: {key_value}")
|
|
334
|
+
return
|
|
335
|
+
return func(self, *args, **kwargs)
|
|
336
|
+
|
|
337
|
+
return wrapper
|
|
338
|
+
|
|
339
|
+
# 应用装饰器
|
|
340
|
+
MlflowClient.create_experiment = inject_workspace_tag(MlflowClient.create_experiment)
|
|
341
|
+
MlflowClient.create_registered_model = inject_workspace_tag(MlflowClient.create_registered_model)
|
|
342
|
+
MlflowClient.create_model_version = inject_workspace_tag(MlflowClient.create_model_version)
|
|
343
|
+
MlflowClient.get_experiment = validate_wedata_tag(MlflowClient.get_experiment)
|
|
344
|
+
MlflowClient.get_experiment_by_name = validate_wedata_tag(MlflowClient.get_experiment_by_name)
|
|
345
|
+
MlflowClient.get_run = validate_wedata_tag(MlflowClient.get_run)
|
|
346
|
+
MlflowClient.get_parent_run = validate_wedata_tag(MlflowClient.get_parent_run)
|
|
347
|
+
MlflowClient.get_registered_model = validate_wedata_tag(MlflowClient.get_registered_model)
|
|
348
|
+
MlflowClient.delete_experiment = validate_wedata_before_operation(MlflowClient.delete_experiment)
|
|
349
|
+
MlflowClient.restore_experiment = validate_wedata_before_operation(MlflowClient.restore_experiment)
|
|
350
|
+
MlflowClient.rename_experiment = validate_wedata_before_operation(MlflowClient.rename_experiment)
|
|
351
|
+
MlflowClient.set_experiment_tag = validate_wedata_before_operation(MlflowClient.set_experiment_tag)
|
|
352
|
+
MlflowClient.set_tag = validate_wedata_before_operation(MlflowClient.set_tag)
|
|
353
|
+
MlflowClient.delete_tag = validate_wedata_before_operation(MlflowClient.delete_tag)
|
|
354
|
+
MlflowClient.update_run = validate_wedata_before_operation(MlflowClient.update_run)
|
|
355
|
+
MlflowClient.download_artifacts = validate_wedata_before_operation(MlflowClient.download_artifacts)
|
|
356
|
+
MlflowClient.list_artifacts = validate_wedata_before_operation(MlflowClient.list_artifacts)
|
|
357
|
+
MlflowClient.delete_run = validate_wedata_before_operation(MlflowClient.delete_run)
|
|
358
|
+
MlflowClient.restore_run = validate_wedata_before_operation(MlflowClient.restore_run)
|
|
359
|
+
MlflowClient.rename_registered_model = validate_wedata_before_operation(MlflowClient.rename_registered_model)
|
|
360
|
+
MlflowClient.update_registered_model = validate_wedata_before_operation(MlflowClient.update_registered_model)
|
|
361
|
+
MlflowClient.delete_registered_model = validate_wedata_before_operation(MlflowClient.delete_registered_model)
|
|
362
|
+
MlflowClient.update_model_version = validate_wedata_before_operation(MlflowClient.update_model_version)
|
|
363
|
+
MlflowClient.delete_model_version = validate_wedata_before_operation(MlflowClient.delete_model_version)
|
|
364
|
+
MlflowClient.set_model_version_tag = validate_wedata_before_operation(MlflowClient.set_model_version_tag)
|
|
365
|
+
MlflowClient.delete_model_version_tag = validate_wedata_before_operation(MlflowClient.delete_model_version_tag)
|
|
366
|
+
MlflowClient.set_registered_model_alias = validate_wedata_before_operation(MlflowClient.set_registered_model_alias)
|
|
367
|
+
MlflowClient.delete_registered_model_alias = validate_wedata_before_operation(MlflowClient.delete_registered_model_alias)
|
|
368
|
+
MlflowClient.set_registered_model_tag = validate_wedata_before_operation(MlflowClient.set_registered_model_tag)
|
|
369
|
+
MlflowClient.delete_registered_model_tag = validate_wedata_before_operation(MlflowClient.delete_registered_model_tag)
|
|
370
|
+
|
|
371
|
+
_original_remote_registry = feast.infra.registry.remote.RemoteRegistry
|
|
372
|
+
|
|
373
|
+
def add_feast_proxy_header():
|
|
374
|
+
def func(*args, **kwargs):
|
|
375
|
+
registry = _original_remote_registry(*args, **kwargs)
|
|
376
|
+
proxy_header_interceptor = GrpcClientProxyHeaderInterceptor()
|
|
377
|
+
registry.channel = grpc.intercept_channel(registry.channel, proxy_header_interceptor)
|
|
378
|
+
registry.stub = RegistryServer_pb2_grpc.RegistryServerStub(registry.channel)
|
|
379
|
+
return registry
|
|
380
|
+
|
|
381
|
+
return func
|
|
382
|
+
|
|
383
|
+
class GrpcClientProxyHeaderInterceptor(
|
|
384
|
+
grpc.UnaryUnaryClientInterceptor,
|
|
385
|
+
grpc.UnaryStreamClientInterceptor,
|
|
386
|
+
grpc.StreamUnaryClientInterceptor,
|
|
387
|
+
grpc.StreamStreamClientInterceptor,
|
|
388
|
+
):
|
|
389
|
+
def __init__(self):
|
|
390
|
+
self.proxy_ip = os.environ.get(FEAST_PROXY_ENV_KEY_IP)
|
|
391
|
+
self.proxy_port = os.environ.get(FEAST_PROXY_ENV_KEY_PORT)
|
|
392
|
+
if not self.proxy_ip:
|
|
393
|
+
raise FeastError(f"Environment variable `{FEAST_PROXY_ENV_KEY_IP}` is not set")
|
|
394
|
+
if not self.proxy_port:
|
|
395
|
+
raise FeastError(f"Environment variable `{FEAST_PROXY_ENV_KEY_PORT}` is not set")
|
|
396
|
+
|
|
397
|
+
def intercept_unary_unary(self, continuation, client_call_details, request_iterator):
|
|
398
|
+
return self._handle_call(continuation, client_call_details, request_iterator)
|
|
399
|
+
|
|
400
|
+
def intercept_unary_stream(self, continuation, client_call_details, request_iterator):
|
|
401
|
+
return self._handle_call(continuation, client_call_details, request_iterator)
|
|
402
|
+
|
|
403
|
+
def intercept_stream_unary(self, continuation, client_call_details, request_iterator):
|
|
404
|
+
return self._handle_call(continuation, client_call_details, request_iterator)
|
|
405
|
+
|
|
406
|
+
def intercept_stream_stream(self, continuation, client_call_details, request_iterator):
|
|
407
|
+
return self._handle_call(continuation, client_call_details, request_iterator)
|
|
408
|
+
|
|
409
|
+
def _handle_call(self, continuation, client_call_details, request_iterator):
|
|
410
|
+
client_call_details = self._append_proxy_header_metadata(client_call_details)
|
|
411
|
+
result = continuation(client_call_details, request_iterator)
|
|
412
|
+
if result.exception() is not None:
|
|
413
|
+
mapped_error = FeastError.from_error_detail(result.exception().details())
|
|
414
|
+
if mapped_error is not None:
|
|
415
|
+
raise mapped_error
|
|
416
|
+
return result
|
|
417
|
+
|
|
418
|
+
def _append_proxy_header_metadata(self, client_call_details):
|
|
419
|
+
metadata = client_call_details.metadata or []
|
|
420
|
+
metadata.append((PROXY_HEADER_KEY_IP.lower(), self.proxy_ip))
|
|
421
|
+
metadata.append((PROXY_HEADER_KEY_PORT.lower(), self.proxy_port))
|
|
422
|
+
client_call_details = client_call_details._replace(metadata=metadata)
|
|
423
|
+
return client_call_details
|
|
424
|
+
|
|
425
|
+
feast.infra.registry.remote.RemoteRegistry = add_feast_proxy_header()
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: wedata-pre-code
|
|
3
|
+
Version: 1.0.8
|
|
4
|
+
Summary: WeData平台的预执行代码库,为机器学习实验提供与MLflow的深度集成
|
|
5
|
+
Author: WeData Team
|
|
6
|
+
Author-email: WeData Team <wedata@tencent.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Requires-Dist: feast==0.49.0
|
|
18
|
+
Requires-Dist: grpcio>=1.71.0
|
|
19
|
+
Requires-Dist: mlflow>=2.0.0
|
|
20
|
+
Requires-Dist: pydantic>=2.10.6
|
|
21
|
+
Requires-Dist: mlflow>=2.0.0,<3.0.0 ; extra == 'mlflow-v2'
|
|
22
|
+
Requires-Dist: wedata-mlflow-header-plugin>=0.1.2 ; extra == 'wedata-3'
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Project-URL: Documentation, https://wedata.tencent.com/docs
|
|
25
|
+
Project-URL: Homepage, https://wedata.tencent.com
|
|
26
|
+
Provides-Extra: mlflow-v2
|
|
27
|
+
Provides-Extra: wedata-3
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# WeData Pre-Code Library
|
|
31
|
+
|
|
32
|
+
WeData平台的预执行代码库,为机器学习实验提供与MLflow的深度集成和WeData平台的功能增强。
|
|
33
|
+
|
|
34
|
+
## 项目概述
|
|
35
|
+
|
|
36
|
+
本项目提供了两个版本的WeData客户端,用于在WeData平台上运行机器学习实验时提供以下功能:
|
|
37
|
+
|
|
38
|
+
- **MLflow集成增强**:自动注入WeData平台特定的标签和过滤条件
|
|
39
|
+
- **权限控制**:基于项目/工作空间的权限验证机制
|
|
40
|
+
- **URL生成**:自动生成实验和运行的查看链接
|
|
41
|
+
- **环境配置**:自动设置运行环境变量
|
|
42
|
+
|
|
43
|
+
## 版本说明
|
|
44
|
+
|
|
45
|
+
### Wedata2PreCodeClient (WeData 2.0版本)
|
|
46
|
+
|
|
47
|
+
适用于WeData 2.0平台的客户端,主要特性:
|
|
48
|
+
|
|
49
|
+
- 基于项目ID进行权限控制
|
|
50
|
+
- 支持国内站和国际站URL模板
|
|
51
|
+
- 自动注入项目标签和机器学习类型标签
|
|
52
|
+
- 提供完整的MLflow客户端装饰器
|
|
53
|
+
|
|
54
|
+
### Wedata3PreCodeClient (WeData 3.0版本)
|
|
55
|
+
|
|
56
|
+
适用于WeData 3.0平台的客户端,主要特性:
|
|
57
|
+
|
|
58
|
+
- 基于工作空间ID进行权限控制
|
|
59
|
+
- 支持更灵活的配置选项
|
|
60
|
+
- 增强的标签注入和验证机制
|
|
61
|
+
- 支持机器学习和深度学习两种实验类型
|
|
62
|
+
|
|
63
|
+
## 安装和使用
|
|
64
|
+
|
|
65
|
+
### 安装依赖
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pip install mlflow
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### 使用Wedata2PreCodeClient
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from wedata_pre_code.wedata2.client import Wedata2PreCodeClient
|
|
75
|
+
|
|
76
|
+
# 初始化客户端
|
|
77
|
+
client = Wedata2PreCodeClient(
|
|
78
|
+
wedata_project_id="{{WEDATA_PROJECT_ID}}",
|
|
79
|
+
wedata_notebook_engine="{{WEDATA_NOTEBOOK_ENGINE}}",
|
|
80
|
+
qcloud_uin="{{QCLOUD_UIN}}",
|
|
81
|
+
qcloud_subuin="{{QCLOUD_SUBUIN}}",
|
|
82
|
+
wedata_default_feature_store_database="{{WEDATA_DEFAULT_FEATURE_STORE_DATABASE}}",
|
|
83
|
+
wedata_feature_store_databases="{{WEDATA_FEATURE_STORE_DATABASES}}",
|
|
84
|
+
qcloud_region="{{QCLOUD_REGION}}",
|
|
85
|
+
mlflow_tracking_uri="{{KERNEL_MLFLOW_TRACKING_URI}}",
|
|
86
|
+
feast_remote_address="{{KERNEL_FEAST_REMOTE_ADDRESS}}",
|
|
87
|
+
kernel_submit_form_workflow="{{KERNEL_SUBMIT_FORM_WORKFLOW}}",
|
|
88
|
+
kernel_task_name="{{KERNEL_TASK_NAME}}",
|
|
89
|
+
kernel_task_id="{{KERNEL_TASK_ID}}",
|
|
90
|
+
kernel_region="ap-chongqing",
|
|
91
|
+
kernel_is_international=bool("{{KERNEL_IS_INTERNATIONAL}}")
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# 现在可以使用MLflow客户端,会自动应用WeData的增强功能
|
|
95
|
+
import mlflow
|
|
96
|
+
mlflow.start_run()
|
|
97
|
+
# ... 你的实验代码
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 使用Wedata3PreCodeClient
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
%pip install wedata-pre-code
|
|
104
|
+
from wedata_pre_code.wedata3.client import Wedata3PreCodeClient
|
|
105
|
+
|
|
106
|
+
# 初始化客户端
|
|
107
|
+
client = Wedata3PreCodeClient(
|
|
108
|
+
workspace_id="{{WorkspaceID}}",
|
|
109
|
+
base_url="{{BaseUrl}}",
|
|
110
|
+
region="{{Region}}",
|
|
111
|
+
ap_region_id=int("{{RegionId}}"),
|
|
112
|
+
mlflow_gateway_url="{{MlflowGatewayUrl}}",
|
|
113
|
+
feast_gateway_url="{{FeastGatewayUrl}}",
|
|
114
|
+
mlflow_proxy_ip="{{MlflowProxyIp}}",
|
|
115
|
+
mlflow_proxy_port="{{MlflowProxyPort}}",
|
|
116
|
+
feast_proxy_ip="{{FeastProxyIp}}",
|
|
117
|
+
feast_proxy_port="{{FeastProxyPort}}",
|
|
118
|
+
kernel_task_name="{{TaskName}}",
|
|
119
|
+
kernel_task_id="{{TaskId}}",
|
|
120
|
+
kernel_submit_form_workflow="{{SubmitFormWorkflow}}",
|
|
121
|
+
cloud_sdk_secret_id="{{CloudSdkSecretId}}",
|
|
122
|
+
cloud_sdk_secret_key="{{CloudSdkSecretKey}}",
|
|
123
|
+
cloud_sdk_secret_token="{{CloudSdkSecretToken}}",
|
|
124
|
+
qcloud_uin="{{QcloudUin}}",
|
|
125
|
+
qcloud_subuin="{{QcloudSubUin}}",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
client.init()
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
* 必传参数:
|
|
132
|
+
- workspace_id: 工作空间ID
|
|
133
|
+
- mlflow_tracking_uri: mlflow跟踪URI
|
|
134
|
+
- base_url: 基础URL
|
|
135
|
+
- mlflow_gateway_url: mlflow基础serverless网关地址
|
|
136
|
+
- feast_gateway_url: feast基础serverless网关地址
|
|
137
|
+
- mlflow_proxy_ip: mlflow转发地址
|
|
138
|
+
- mlflow_proxy_port: mlflow转发端口
|
|
139
|
+
- feast_proxy_ip: feast转发地址
|
|
140
|
+
- feast_proxy_port: feast转发端口
|
|
141
|
+
* 可选参数:
|
|
142
|
+
- region: 区域
|
|
143
|
+
- ap_region_id: 区域ID
|
|
144
|
+
- kernel_task_name: Notebook路径名
|
|
145
|
+
- kernel_task_id: Notebook文件ID
|
|
146
|
+
- kernel_submit_form_workflow: 任务提交表单工作流
|
|
147
|
+
- kernel_is_international: 是否国际站
|
|
148
|
+
- cloud_sdk_secret_id: 云SDK密钥ID
|
|
149
|
+
- cloud_sdk_secret_key: 云SDK密钥KEY
|
|
150
|
+
- cloud_sdk_secret_token: 云SDK密钥TOKEN
|
|
151
|
+
- qcloud_uin: 腾讯云uin
|
|
152
|
+
- qcloud_subuin: 腾讯云subuin
|
|
153
|
+
-
|
|
154
|
+
## 功能特性
|
|
155
|
+
|
|
156
|
+
### 自动标签注入
|
|
157
|
+
|
|
158
|
+
- 自动为实验、运行和模型注入WeData平台标签
|
|
159
|
+
- 包括项目ID、工作空间ID、机器学习类型等信息
|
|
160
|
+
- 确保数据在平台上的可追溯性
|
|
161
|
+
|
|
162
|
+
### 权限验证
|
|
163
|
+
|
|
164
|
+
- 在执行敏感操作前验证权限
|
|
165
|
+
- 防止跨项目/工作空间的未授权操作
|
|
166
|
+
- 保护内置标签不被修改
|
|
167
|
+
|
|
168
|
+
### URL生成
|
|
169
|
+
|
|
170
|
+
- 自动生成实验和运行的查看URL
|
|
171
|
+
- 在运行终止时显示访问链接
|
|
172
|
+
- 方便用户快速访问实验结果
|
|
173
|
+
|
|
174
|
+
### 环境配置
|
|
175
|
+
|
|
176
|
+
- 自动设置MLflow跟踪URI
|
|
177
|
+
- 配置运行上下文环境变量
|
|
178
|
+
- 支持国际站和国内站的不同配置
|
|
179
|
+
|
|
180
|
+
## 项目结构
|
|
181
|
+
|
|
182
|
+
```
|
|
183
|
+
pre-execute/
|
|
184
|
+
├── src/
|
|
185
|
+
│ └── wedata_pre_code/
|
|
186
|
+
│ ├── __init__.py
|
|
187
|
+
│ ├── client.py # 主客户端入口
|
|
188
|
+
│ ├── common/
|
|
189
|
+
│ │ ├── __init__.py
|
|
190
|
+
│ │ └── base_client.py # 基础客户端类
|
|
191
|
+
│ ├── wedata2/
|
|
192
|
+
│ │ ├── __init__.py
|
|
193
|
+
│ │ └── client.py # WeData 2.0客户端
|
|
194
|
+
│ └── wedata3/
|
|
195
|
+
│ ├── __init__.py
|
|
196
|
+
│ └── client.py # WeData 3.0客户端
|
|
197
|
+
├── docs/ # 文档目录
|
|
198
|
+
├── pyproject.toml # 项目配置
|
|
199
|
+
├── requirement.txt # 依赖文件
|
|
200
|
+
└── README.md # 项目说明
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## 开发指南
|
|
204
|
+
|
|
205
|
+
### 添加新的装饰器
|
|
206
|
+
|
|
207
|
+
要添加新的MLflow客户端方法装饰器,可以参考现有的实现模式:
|
|
208
|
+
|
|
209
|
+
1. 在相应的客户端类中定义装饰器函数
|
|
210
|
+
2. 使用`@wraps`保留原函数属性
|
|
211
|
+
3. 在装饰器内部实现特定的逻辑
|
|
212
|
+
4. 将装饰器应用到目标MLflow方法
|
|
213
|
+
|
|
214
|
+
### 测试
|
|
215
|
+
|
|
216
|
+
确保在修改代码后测试以下场景:
|
|
217
|
+
|
|
218
|
+
- 正常创建实验和运行
|
|
219
|
+
- 权限验证功能
|
|
220
|
+
- 标签注入的正确性
|
|
221
|
+
- URL生成的准确性
|
|
222
|
+
|
|
223
|
+
## 注意事项
|
|
224
|
+
|
|
225
|
+
- 确保MLflow服务器配置正确
|
|
226
|
+
- 验证环境变量设置完整
|
|
227
|
+
- 注意不同版本客户端的参数差异
|
|
228
|
+
- 在生产环境使用前进行充分测试
|
|
229
|
+
|
|
230
|
+
## 支持与反馈
|
|
231
|
+
|
|
232
|
+
如有问题或建议,请联系WeData平台技术支持团队。
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
wedata_pre_code/__init__.py,sha256=-p_dX-VDC0sAqqAwuWLLlnzRNAHad1LF7wOFJw2z_uk,240
|
|
2
|
+
wedata_pre_code/client.py,sha256=IrcbswwI206_Qxxc9IzqXYKkGrWB2IDbIyBzZa1ex0E,1111
|
|
3
|
+
wedata_pre_code/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
wedata_pre_code/common/base_client.py,sha256=Y9lQBOYaDOB7Zy3MR_bPvHSQjN5Onsn8TPOzcTmGMEc,224
|
|
5
|
+
wedata_pre_code/wedata2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
wedata_pre_code/wedata2/client.py,sha256=ScAQRrT-alc2CqatiwLCNAymYfEtWKWTnrCsLTbYEoM,19535
|
|
7
|
+
wedata_pre_code/wedata3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
wedata_pre_code/wedata3/client.py,sha256=CKuuHHU9JZXQeLORtPU3xXSAXzynBFN6jec8UBvrC_I,21228
|
|
9
|
+
wedata_pre_code-1.0.8.dist-info/WHEEL,sha256=eycQt0QpYmJMLKpE3X9iDk8R04v2ZF0x82ogq-zP6bQ,79
|
|
10
|
+
wedata_pre_code-1.0.8.dist-info/METADATA,sha256=3WpiDxxbZigT7G00pLE8nzOJWflPKCXmIMiLYGRGmTQ,7371
|
|
11
|
+
wedata_pre_code-1.0.8.dist-info/RECORD,,
|