tfduck-bsd 0.18.5__tar.gz → 0.18.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tfduck-bsd might be problematic. Click here for more details.
- {tfduck-bsd-0.18.5/tfduck_bsd.egg-info → tfduck-bsd-0.18.7}/PKG-INFO +1 -1
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/setup.py +1 -1
- tfduck-bsd-0.18.7/tfduck/__init__.py +1 -0
- tfduck-bsd-0.18.7/tfduck/serverless_k8s/k8s_manage.py +228 -0
- tfduck-bsd-0.18.7/tfduck/serverless_k8s/k8s_task.py +343 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/tga.py +9 -1
- tfduck-bsd-0.18.7/tfduck/thinkdata/__init__.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7/tfduck_bsd.egg-info}/PKG-INFO +1 -1
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck_bsd.egg-info/SOURCES.txt +3 -0
- tfduck-bsd-0.18.5/tfduck/__init__.py +0 -1
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/LICENSE +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/README.md +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/bin/tfduck +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/setup.cfg +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/__init__.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/config/__init__.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/config/bdpmanager.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/config/table_config.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/example.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/opends/__init__.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/opends/opends.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/opends/sdk.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/common/__init__.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/common/defines.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/common/defines_clean.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/common/extendEncoder.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/main.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/oss/__init__.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/oss/oss.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/pyspark_k8s/__init__.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/pyspark_k8s/k8s_manage.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/pyspark_k8s/spark_manage.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/s3/__init__.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/s3/s3oper.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/sagemaker/__init__.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/sagemaker/saoper.py +0 -0
- {tfduck-bsd-0.18.5/tfduck/tga → tfduck-bsd-0.18.7/tfduck/serverless_k8s}/__init__.py +0 -0
- {tfduck-bsd-0.18.5/tfduck/thinkdata → tfduck-bsd-0.18.7/tfduck/tga}/__init__.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/base_tga.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/predict_sql_ltv.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/predict_sql_retain.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/predict_sql_yh.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/tga_test.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/train_sql_ltv.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/train_sql_retain.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/train_sql_yh.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/thinkdata/query.py +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck_bsd.egg-info/dependency_links.txt +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck_bsd.egg-info/requires.txt +0 -0
- {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck_bsd.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__="0.18.7"
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
"""
|
|
2
|
+
auhtor: yuanxiao
|
|
3
|
+
datetime: 20210423
|
|
4
|
+
python3.8
|
|
5
|
+
pyspark 提交阿里云ask集群管理器
|
|
6
|
+
pip install kubernetes==12.0.1 oss2==2.19.1
|
|
7
|
+
|
|
8
|
+
如果报错【CA_KEY_TOO_SMALL】则执行(因为k8s的老的秘钥创建是1024位的):
|
|
9
|
+
参考: https://www.cnblogs.com/dingnosakura/p/17815410.html
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
if 1:
|
|
13
|
+
# 解决 【CA_KEY_TOO_SMALL】的报错,利用猴子补丁[monkey Patching ] ,适用于urllib3==2.x版本
|
|
14
|
+
import urllib3
|
|
15
|
+
from urllib3.util.ssl_ import create_urllib3_context
|
|
16
|
+
|
|
17
|
+
# 创建自定义 SSL 上下文(复用 urllib3 默认配置基础上修改)
|
|
18
|
+
ctx = create_urllib3_context()
|
|
19
|
+
ctx.set_ciphers("DEFAULT@SECLEVEL=0") # 例如降低安全级别
|
|
20
|
+
ctx.load_default_certs() # 确保加载系统证书
|
|
21
|
+
|
|
22
|
+
# 保存原始 PoolManager 初始化方法(便于回滚)
|
|
23
|
+
_original_poolmanager_init = urllib3.PoolManager.__init__
|
|
24
|
+
|
|
25
|
+
def _patched_poolmanager_init(self, *args, **kwargs):
|
|
26
|
+
# 仅在未显式传递 ssl_context 时注入自定义上下文
|
|
27
|
+
if "ssl_context" not in kwargs:
|
|
28
|
+
kwargs["ssl_context"] = ctx
|
|
29
|
+
_original_poolmanager_init(self, *args, **kwargs)
|
|
30
|
+
|
|
31
|
+
# 应用补丁(确保只执行一次)
|
|
32
|
+
if urllib3.PoolManager.__init__ != _patched_poolmanager_init:
|
|
33
|
+
urllib3.PoolManager.__init__ = _patched_poolmanager_init
|
|
34
|
+
|
|
35
|
+
import json
|
|
36
|
+
|
|
37
|
+
# from tfduck.common.defines import BMOBJ, Et
|
|
38
|
+
# from tfduck.oss.oss import AliyunOss
|
|
39
|
+
from kubernetes import client, config, utils
|
|
40
|
+
import uuid
|
|
41
|
+
import arrow
|
|
42
|
+
import os
|
|
43
|
+
import base64
|
|
44
|
+
import subprocess
|
|
45
|
+
from io import BytesIO, StringIO
|
|
46
|
+
import yaml
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class K8sManage(object):
|
|
50
|
+
""" """
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self, ctx, k8s_conn_yaml=None, k8s_conn_json=None, namespace="default"
|
|
54
|
+
):
|
|
55
|
+
"""
|
|
56
|
+
@des: 初始化
|
|
57
|
+
k8s_conn_yaml ---- k8s的配置文件,从阿里云下载的,保存为yaml文件
|
|
58
|
+
k8s_conn_json ---- 从k8s的配置yaml文件解析成的json对象,解析方法: (list(yaml.safe_load_all(open(self.k8s_conn_config, "r")))[0])
|
|
59
|
+
"""
|
|
60
|
+
"""
|
|
61
|
+
全局初始化
|
|
62
|
+
"""
|
|
63
|
+
self.ctx = ctx
|
|
64
|
+
self.k8s_conn_config = k8s_conn_yaml
|
|
65
|
+
self.k8s_conn_json = k8s_conn_json
|
|
66
|
+
self.namespace = namespace
|
|
67
|
+
"""
|
|
68
|
+
生成k8s客户端
|
|
69
|
+
"""
|
|
70
|
+
self.k8s_client = self.get_k8s_client(mode="v1")
|
|
71
|
+
self.k8s_api_client = self.get_k8s_client(mode="api")
|
|
72
|
+
#
|
|
73
|
+
# print(self.k8s_client, self.k8s_api_client)
|
|
74
|
+
|
|
75
|
+
def get_k8s_client(self, mode="v1"):
|
|
76
|
+
"""
|
|
77
|
+
@des: 获取k8s操作客户端
|
|
78
|
+
例子 https://github.com/kubernetes-client/python/tree/master/examples
|
|
79
|
+
api文档 https://github.com/kubernetes-client/python/blob/master/kubernetes/README.md
|
|
80
|
+
"""
|
|
81
|
+
# if 1:
|
|
82
|
+
# # 从yaml生成json文件进行登录,方便后面程序配置
|
|
83
|
+
# with open(self.k8s_conn_config, "r") as f:
|
|
84
|
+
# config_jsons = yaml.safe_load_all(f)
|
|
85
|
+
# config_json = list(config_jsons)[0]
|
|
86
|
+
# print(111, config_json)
|
|
87
|
+
# config.load_kube_config_from_dict(config_json)
|
|
88
|
+
# else:
|
|
89
|
+
# config.load_kube_config(self.k8s_conn_config)
|
|
90
|
+
if self.k8s_conn_config:
|
|
91
|
+
config.load_kube_config(self.k8s_conn_config)
|
|
92
|
+
elif self.k8s_conn_json:
|
|
93
|
+
# 这样获取
|
|
94
|
+
# with open(self.k8s_conn_config, "r") as f:
|
|
95
|
+
# config_jsons = yaml.safe_load_all(f)
|
|
96
|
+
# k8s_conn_json = list(config_jsons)[0]
|
|
97
|
+
config.load_kube_config_from_dict(self.k8s_conn_json)
|
|
98
|
+
if mode == "v1":
|
|
99
|
+
c = client.CoreV1Api()
|
|
100
|
+
elif mode == "api":
|
|
101
|
+
c = client.ApiClient()
|
|
102
|
+
return c
|
|
103
|
+
# print("Listing pods with their IPs:")
|
|
104
|
+
# ret = v1.list_pod_for_all_namespaces(watch=False)
|
|
105
|
+
# for i in ret.items:
|
|
106
|
+
# print("%s\t%s\t%s" %
|
|
107
|
+
# (i.status.pod_ip, i.metadata.namespace, i.metadata.name))
|
|
108
|
+
|
|
109
|
+
def get_all_pods(self):
|
|
110
|
+
"""
|
|
111
|
+
@des: 获取所有的k8s的pod, 默认命名空间的
|
|
112
|
+
"""
|
|
113
|
+
# print("Listing pods with their IPs:")
|
|
114
|
+
ret = self.k8s_client.list_pod_for_all_namespaces(watch=False)
|
|
115
|
+
pod_infos = []
|
|
116
|
+
for i in ret.items:
|
|
117
|
+
if i.metadata.namespace == self.namespace:
|
|
118
|
+
pod_infos.append(
|
|
119
|
+
{
|
|
120
|
+
"name": i.metadata.name,
|
|
121
|
+
"status": i.status.phase,
|
|
122
|
+
"create_time": i.metadata.creation_timestamp,
|
|
123
|
+
}
|
|
124
|
+
)
|
|
125
|
+
return pod_infos
|
|
126
|
+
|
|
127
|
+
def get_pod_info(self, pod_name):
|
|
128
|
+
"""
|
|
129
|
+
@des: 获取pod的基础信息,状态等信息
|
|
130
|
+
"""
|
|
131
|
+
# resp = self.k8s_client.read_namespaced_pod_status(
|
|
132
|
+
# name="tfspark-test-7200-driver-1e5ab8f2a41d11eb93c3acde48001122", namespace='default')
|
|
133
|
+
# resp = self.k8s_client.read_namespaced_pod(
|
|
134
|
+
# name="tfspark-test-7200-driver-1e5ab8f2a41d11eb93c3acde48001122", namespace='default')
|
|
135
|
+
pod_obj = self.k8s_client.read_namespaced_pod(
|
|
136
|
+
name=pod_name, namespace=self.namespace
|
|
137
|
+
)
|
|
138
|
+
# print(resp)
|
|
139
|
+
# print(resp.metadata.name, resp.status.phase)
|
|
140
|
+
# return resp.status.phase # Pending,Succeeded, Running, Failed
|
|
141
|
+
return pod_obj
|
|
142
|
+
|
|
143
|
+
def get_pod_log(self, pod_name, lines_count=1000):
|
|
144
|
+
"""
|
|
145
|
+
@des: 获取pod的日志--测试成功
|
|
146
|
+
"""
|
|
147
|
+
# print(help(self.k8s_client.read_namespaced_pod_log))
|
|
148
|
+
# log_content = self.k8s_client.read_namespaced_pod_log(
|
|
149
|
+
# 'tfspark-test-7200-driver-c697aadea41d11eb9d2bacde48001122', 'default', pretty=True, tail_lines=1) # container='spark-kubernetes-driver'
|
|
150
|
+
log_content = self.k8s_client.read_namespaced_pod_log(
|
|
151
|
+
pod_name, self.namespace, pretty=True, tail_lines=lines_count
|
|
152
|
+
) # tail_lines 表示从结尾开始获取日志的行数
|
|
153
|
+
# print(log_content)
|
|
154
|
+
return log_content
|
|
155
|
+
|
|
156
|
+
# def patch_pod_info(self, pod_name):
|
|
157
|
+
# """
|
|
158
|
+
# @des: 修改pod的信息,包括状态
|
|
159
|
+
# """
|
|
160
|
+
# pod_obj = self.get_pod_info(pod_name)
|
|
161
|
+
# pod_obj.status.phase = ""
|
|
162
|
+
# # print(help(self.k8s_client.patch_namespaced_pod))
|
|
163
|
+
# self.k8s_client.patch_namespaced_pod(pod_name, 'default', pod_obj)
|
|
164
|
+
def create_by_yaml(self, yaml_file):
|
|
165
|
+
""" """
|
|
166
|
+
result = utils.create_from_yaml(
|
|
167
|
+
k8s_client=self.k8s_api_client,
|
|
168
|
+
yaml_file=yaml_file,
|
|
169
|
+
namespace=self.namespace,
|
|
170
|
+
)
|
|
171
|
+
# print(result)
|
|
172
|
+
# print("count: ", len(result))
|
|
173
|
+
|
|
174
|
+
def create_by_yaml_str(self, yaml_str):
|
|
175
|
+
"""
|
|
176
|
+
@des: 通过yaml字符串创建pod
|
|
177
|
+
"""
|
|
178
|
+
with StringIO(yaml_str) as f:
|
|
179
|
+
yml_document_all = yaml.safe_load_all(f)
|
|
180
|
+
failures = []
|
|
181
|
+
k8s_objects = []
|
|
182
|
+
for yml_document in yml_document_all:
|
|
183
|
+
# print(111, json.dumps(yml_document)) ---- 后面也可以编写json创建
|
|
184
|
+
try:
|
|
185
|
+
created = utils.create_from_dict(
|
|
186
|
+
k8s_client=self.k8s_api_client,
|
|
187
|
+
data=yml_document,
|
|
188
|
+
namespace=self.namespace,
|
|
189
|
+
)
|
|
190
|
+
k8s_objects.append(created)
|
|
191
|
+
except utils.FailToCreateError as failure:
|
|
192
|
+
failures.extend(failure.api_exceptions)
|
|
193
|
+
if failures:
|
|
194
|
+
raise utils.FailToCreateError(failures)
|
|
195
|
+
# print(k8s_objects)
|
|
196
|
+
# print("count: ", len(k8s_objects))
|
|
197
|
+
return k8s_objects
|
|
198
|
+
|
|
199
|
+
def create_by_json(self, json_obj):
|
|
200
|
+
"""
|
|
201
|
+
@des: 通过json对象创建pod
|
|
202
|
+
"""
|
|
203
|
+
failures = []
|
|
204
|
+
try:
|
|
205
|
+
created = utils.create_from_dict(
|
|
206
|
+
k8s_client=self.k8s_api_client,
|
|
207
|
+
data=json_obj,
|
|
208
|
+
namespace=self.namespace,
|
|
209
|
+
)
|
|
210
|
+
except utils.FailToCreateError as failure:
|
|
211
|
+
failures.extend(failure.api_exceptions)
|
|
212
|
+
if failures:
|
|
213
|
+
raise utils.FailToCreateError(failures)
|
|
214
|
+
return created
|
|
215
|
+
|
|
216
|
+
def delete_pod(self, pod_name):
|
|
217
|
+
"""
|
|
218
|
+
@des: 删除pod
|
|
219
|
+
delete是删除---
|
|
220
|
+
delete_namespaced_pod
|
|
221
|
+
"""
|
|
222
|
+
# print(help(self.k8s_client.delete_namespaced_pod))
|
|
223
|
+
# pod_obj = self.get_pod_info(pod_name)
|
|
224
|
+
# print(pod_obj.metadata.name, pod_obj.metadata.creation_timestamp, pod_obj.status.phase)
|
|
225
|
+
# pod_create = pod_obj.metadata.creation_timestamp
|
|
226
|
+
# status = pod_obj.status.phase
|
|
227
|
+
r = self.k8s_client.delete_namespaced_pod(pod_name, self.namespace)
|
|
228
|
+
return r
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""
|
|
2
|
+
auhtor: yuanxiao
|
|
3
|
+
datetime: 20210423
|
|
4
|
+
python3.8
|
|
5
|
+
ask serverless的提交任务
|
|
6
|
+
pip install kubernetes==12.0.1
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
# from tfduck.common.defines import BMOBJ, Et
|
|
11
|
+
|
|
12
|
+
# from tfduck.oss.oss import AliyunOss
|
|
13
|
+
import arrow
|
|
14
|
+
import time
|
|
15
|
+
import os
|
|
16
|
+
import base64
|
|
17
|
+
import subprocess
|
|
18
|
+
import uuid
|
|
19
|
+
|
|
20
|
+
if 1:
|
|
21
|
+
from k8s_manage import K8sManage
|
|
22
|
+
from k8s_upload_oss import AliyunOss
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# class YxBMOBJ(object):
|
|
26
|
+
# def clog(self, ctx, *args):
|
|
27
|
+
# print(*args)
|
|
28
|
+
# pass
|
|
29
|
+
|
|
30
|
+
# def get_file_path(self, path):
|
|
31
|
+
# """
|
|
32
|
+
# @des: 文件的真实路径
|
|
33
|
+
# """
|
|
34
|
+
# # self.dj44_base_dir = "/Users/yuanxiao/workspace/djcelery44/djcelery44"
|
|
35
|
+
# # return os.path.join(self.dj44_base_dir, os.path.join("dags/sptasks/p_code", path))
|
|
36
|
+
# return os.path.abspath(path)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# BMOBJ = YxBMOBJ()
|
|
40
|
+
|
|
41
|
+
if 1:
|
|
42
|
+
from tfduck.common.defines import BMOBJ, Et
|
|
43
|
+
from tfduck.oss.oss import AliyunOss
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Et(Exception):
|
|
47
|
+
def __init__(self, code, msg):
|
|
48
|
+
self.code = code
|
|
49
|
+
self.msg = msg
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ServerlessTaskManage(object):
|
|
53
|
+
""" """
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
ctx,
|
|
58
|
+
code_path,
|
|
59
|
+
max_run_time=7200,
|
|
60
|
+
# 参考main的调用
|
|
61
|
+
task_config={
|
|
62
|
+
# pod的名称前缀-必填
|
|
63
|
+
"task_name": "yxtestpodtask",
|
|
64
|
+
# cpu的使用量-必填
|
|
65
|
+
"cpu": "500m",
|
|
66
|
+
# 内存的使用量-必填
|
|
67
|
+
"memory": "500Mi",
|
|
68
|
+
# 镜像地址-必填
|
|
69
|
+
"image_url": "registry-intl.cn-beijing.aliyuncs.com/talefun/python311:base",
|
|
70
|
+
# serverless集群挂载的oss的pvc的名称,必须在【duck-task】命名空间的pvc-必填
|
|
71
|
+
"pvc_name": "tfduck-k8s-pvc",
|
|
72
|
+
# pypi的源,如果为空字符串,则使用官方源
|
|
73
|
+
"pypi_mirror": "https://pypi.tuna.tsinghua.edu.cn/simple",
|
|
74
|
+
},
|
|
75
|
+
#
|
|
76
|
+
is_debug=False,
|
|
77
|
+
project_name="playdayy-bj",
|
|
78
|
+
pull_pod_success_log=True,
|
|
79
|
+
pull_pod_fail_log=True,
|
|
80
|
+
is_async=False,
|
|
81
|
+
project_configs={},
|
|
82
|
+
):
|
|
83
|
+
"""
|
|
84
|
+
######################################特别注意############################################
|
|
85
|
+
code_path: 代码路径
|
|
86
|
+
project_configs: 所有项目配置
|
|
87
|
+
project_name: 项目名称,在project_configs里面找到对应的key
|
|
88
|
+
######################################
|
|
89
|
+
"""
|
|
90
|
+
"""
|
|
91
|
+
校验
|
|
92
|
+
"""
|
|
93
|
+
task_sub_name = task_config.get("task_name", "default")
|
|
94
|
+
if task_sub_name is None:
|
|
95
|
+
raise Et(2, "task name must not be none")
|
|
96
|
+
task_sub_name = task_sub_name.strip()
|
|
97
|
+
if task_sub_name == "":
|
|
98
|
+
raise Et(2, "task name must not be '' ")
|
|
99
|
+
if task_sub_name.find("-") != -1:
|
|
100
|
+
raise Et(2, "task name not clude - char")
|
|
101
|
+
"""
|
|
102
|
+
工程配置
|
|
103
|
+
"""
|
|
104
|
+
self.project_name = project_name # 切换本地调试集群
|
|
105
|
+
self.project_configs = project_configs
|
|
106
|
+
self.project_conf = self.project_configs[self.project_name]
|
|
107
|
+
"""
|
|
108
|
+
全局初始化
|
|
109
|
+
"""
|
|
110
|
+
self.ctx = ctx
|
|
111
|
+
self.is_debug = is_debug # 如果为true,就在mac本地调试
|
|
112
|
+
self.max_run_time = max_run_time # pod task最大运行时间
|
|
113
|
+
self.code_path = code_path
|
|
114
|
+
self.pull_pod_success_log = pull_pod_success_log
|
|
115
|
+
self.pull_pod_fail_log = pull_pod_fail_log
|
|
116
|
+
self.task_config = task_config
|
|
117
|
+
self.pod_name = f"{task_sub_name}-{self.max_run_time}-{uuid.uuid4().hex[:8]}"
|
|
118
|
+
self.oss_root_name = "oss_data"
|
|
119
|
+
self.is_async = is_async
|
|
120
|
+
"""
|
|
121
|
+
系统oss的配置
|
|
122
|
+
"""
|
|
123
|
+
self.oss_config = self.project_conf["oss_pv"]
|
|
124
|
+
"""
|
|
125
|
+
生成k8s客户端
|
|
126
|
+
"""
|
|
127
|
+
self.name_space = "duck-task"
|
|
128
|
+
self.k8s_client = K8sManage(
|
|
129
|
+
ctx,
|
|
130
|
+
k8s_conn_json=self.project_conf["k8s_serverless_conn"],
|
|
131
|
+
namespace=self.name_space,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
def upload_code(self):
|
|
135
|
+
"""
|
|
136
|
+
上传执行代码到oss
|
|
137
|
+
"""
|
|
138
|
+
now = arrow.utcnow()
|
|
139
|
+
now_str = now.format("YYYY-MM-DD")
|
|
140
|
+
oss_config = self.oss_config
|
|
141
|
+
ctx = self.ctx
|
|
142
|
+
uoss = AliyunOss(
|
|
143
|
+
oss_config["bucket"],
|
|
144
|
+
oss_config["access_key"],
|
|
145
|
+
oss_config["secret_key"],
|
|
146
|
+
oss_config["endpoint_public"],
|
|
147
|
+
)
|
|
148
|
+
oss_file_path = f"code/{now_str}/{uuid.uuid4().hex}/" # 注意必须斜杠结尾
|
|
149
|
+
if self.code_path:
|
|
150
|
+
local_file_path = BMOBJ.get_file_path(self.code_path)
|
|
151
|
+
if not local_file_path.startswith("/"):
|
|
152
|
+
local_file_path = f"/{local_file_path}"
|
|
153
|
+
uoss.upload_oss(
|
|
154
|
+
ctx,
|
|
155
|
+
local_file_path,
|
|
156
|
+
oss_file_path,
|
|
157
|
+
False,
|
|
158
|
+
False,
|
|
159
|
+
isrm=True,
|
|
160
|
+
isdel=False,
|
|
161
|
+
) # 递归查找, 不删除本地文件
|
|
162
|
+
else:
|
|
163
|
+
raise Et("code_path不能为空")
|
|
164
|
+
# uoss.download_oss(ctx, local_file_path+"xx", oss_file_path, isrm=True, isdel=False) # 测试下载
|
|
165
|
+
self.real_code_path = os.path.join(f"/{self.oss_root_name}", oss_file_path)
|
|
166
|
+
|
|
167
|
+
def clean_code(self):
|
|
168
|
+
"""
|
|
169
|
+
清理代码
|
|
170
|
+
"""
|
|
171
|
+
ctx = self.ctx
|
|
172
|
+
oss_config = self.oss_config
|
|
173
|
+
uoss = AliyunOss(
|
|
174
|
+
oss_config["bucket"],
|
|
175
|
+
oss_config["access_key"],
|
|
176
|
+
oss_config["secret_key"],
|
|
177
|
+
oss_config["endpoint_public"],
|
|
178
|
+
)
|
|
179
|
+
# self.real_code_path 不要self.oss_root_name部分即可
|
|
180
|
+
oss_code_path = self.real_code_path.replace(f"/{self.oss_root_name}/", "")
|
|
181
|
+
BMOBJ.clog(ctx, "clean code path:", oss_code_path)
|
|
182
|
+
uoss.delete_prefix_oss(ctx, oss_code_path, isrm=True)
|
|
183
|
+
|
|
184
|
+
def get_submit_task_cmd(self):
|
|
185
|
+
"""
|
|
186
|
+
构建
|
|
187
|
+
"""
|
|
188
|
+
pypi_mirror = self.task_config.get("pypi_mirror", "")
|
|
189
|
+
pypi_mirror_host = (
|
|
190
|
+
pypi_mirror.split("://")[1].split("/")[0] if pypi_mirror else ""
|
|
191
|
+
)
|
|
192
|
+
pypi_i = (
|
|
193
|
+
f"-i {pypi_mirror} --trusted-host {pypi_mirror_host}" if pypi_mirror else ""
|
|
194
|
+
)
|
|
195
|
+
"""
|
|
196
|
+
json命令,参考该目录的k8s_manage.py的如何根据yaml解析json的方式
|
|
197
|
+
参考yaml_to_json.py
|
|
198
|
+
"""
|
|
199
|
+
cmd_json = {
|
|
200
|
+
"apiVersion": "v1",
|
|
201
|
+
"kind": "Pod",
|
|
202
|
+
"metadata": {"name": self.pod_name, "namespace": self.name_space},
|
|
203
|
+
"spec": {
|
|
204
|
+
"containers": [
|
|
205
|
+
{
|
|
206
|
+
"name": "python311-container",
|
|
207
|
+
"image": self.task_config["image_url"],
|
|
208
|
+
"command": ["/bin/bash", "-c"],
|
|
209
|
+
"args": [
|
|
210
|
+
f"""cd {self.real_code_path} && \
|
|
211
|
+
python3 --version && \
|
|
212
|
+
pip install -U pip {pypi_i} && \
|
|
213
|
+
pip install arrow {pypi_i} && \
|
|
214
|
+
pip install -r requirements.txt {pypi_i} && \
|
|
215
|
+
python main.py
|
|
216
|
+
"""
|
|
217
|
+
],
|
|
218
|
+
"resources": {
|
|
219
|
+
"requests": {
|
|
220
|
+
"memory": self.task_config["memory"],
|
|
221
|
+
"cpu": self.task_config["cpu"],
|
|
222
|
+
},
|
|
223
|
+
"limits": {
|
|
224
|
+
"memory": self.task_config["memory"],
|
|
225
|
+
"cpu": self.task_config["cpu"],
|
|
226
|
+
},
|
|
227
|
+
},
|
|
228
|
+
"volumeMounts": [
|
|
229
|
+
{
|
|
230
|
+
"name": "tfduck-k8s-pv",
|
|
231
|
+
"mountPath": f"/{self.oss_root_name}",
|
|
232
|
+
}
|
|
233
|
+
],
|
|
234
|
+
}
|
|
235
|
+
],
|
|
236
|
+
"volumes": [
|
|
237
|
+
{
|
|
238
|
+
"name": "tfduck-k8s-pv",
|
|
239
|
+
"persistentVolumeClaim": {
|
|
240
|
+
"claimName": self.task_config["pvc_name"]
|
|
241
|
+
},
|
|
242
|
+
}
|
|
243
|
+
],
|
|
244
|
+
"restartPolicy": "Never",
|
|
245
|
+
},
|
|
246
|
+
}
|
|
247
|
+
return cmd_json
|
|
248
|
+
|
|
249
|
+
def submit_task(self):
|
|
250
|
+
"""
|
|
251
|
+
提交任务
|
|
252
|
+
"""
|
|
253
|
+
result = True
|
|
254
|
+
ctx = self.ctx
|
|
255
|
+
"""
|
|
256
|
+
执行任务
|
|
257
|
+
"""
|
|
258
|
+
submit_json = self.get_submit_task_cmd()
|
|
259
|
+
try:
|
|
260
|
+
self.k8s_client.create_by_json(submit_json)
|
|
261
|
+
except Exception as e:
|
|
262
|
+
raise e
|
|
263
|
+
"""
|
|
264
|
+
判断任务是否完成---不从日志里面去判断,这样会拉回来很多日志
|
|
265
|
+
"""
|
|
266
|
+
pod_obj = self.k8s_client.get_pod_info(self.pod_name)
|
|
267
|
+
if self.is_async:
|
|
268
|
+
BMOBJ.clog(ctx, pod_obj.status.phase)
|
|
269
|
+
if pod_obj.status.phase in ["Pending", "Running"]:
|
|
270
|
+
BMOBJ.clog(ctx, "task success runing")
|
|
271
|
+
else:
|
|
272
|
+
BMOBJ.clog(ctx, "task fail")
|
|
273
|
+
result = False
|
|
274
|
+
else:
|
|
275
|
+
start = time.time()
|
|
276
|
+
#
|
|
277
|
+
wait_time = 0
|
|
278
|
+
# 第一阶段判断--进入Running状态--
|
|
279
|
+
pd_1 = 0
|
|
280
|
+
pd_1_status = ["Running"]
|
|
281
|
+
max_pd_1 = 30 # 最大等待次数
|
|
282
|
+
while pod_obj.status.phase not in pd_1_status and pd_1 < max_pd_1:
|
|
283
|
+
time.sleep(10)
|
|
284
|
+
try:
|
|
285
|
+
pod_obj = self.k8s_client.get_pod_info(self.pod_name)
|
|
286
|
+
except Exception as _:
|
|
287
|
+
pass
|
|
288
|
+
pd_1 += 1
|
|
289
|
+
if pod_obj.status.phase not in pd_1_status:
|
|
290
|
+
BMOBJ.clog(
|
|
291
|
+
ctx,
|
|
292
|
+
f"task fail in {time.time() - start} seconds state: {pod_obj.status.phase}",
|
|
293
|
+
)
|
|
294
|
+
result = False
|
|
295
|
+
return result
|
|
296
|
+
# 第二阶段判断--进入Succeeded或者Failed状态--
|
|
297
|
+
while (
|
|
298
|
+
pod_obj.status.phase not in ["Succeeded", "Failed"]
|
|
299
|
+
or wait_time > self.max_run_time
|
|
300
|
+
):
|
|
301
|
+
time.sleep(10)
|
|
302
|
+
try:
|
|
303
|
+
pod_obj = self.k8s_client.get_pod_info(self.pod_name)
|
|
304
|
+
except Exception as _:
|
|
305
|
+
pass
|
|
306
|
+
wait_time = time.time() - start
|
|
307
|
+
#
|
|
308
|
+
BMOBJ.clog(ctx, f"pod status: {pod_obj.status.phase}")
|
|
309
|
+
#
|
|
310
|
+
if pod_obj.status.phase == "Succeeded":
|
|
311
|
+
BMOBJ.clog(ctx, "task success complated")
|
|
312
|
+
if self.pull_pod_success_log:
|
|
313
|
+
success_log = self.k8s_client.get_pod_log(self.pod_name, 500)
|
|
314
|
+
BMOBJ.clog(ctx, "pod task success log ----------:", success_log)
|
|
315
|
+
else:
|
|
316
|
+
BMOBJ.clog(ctx, "task fail-------------------error log:")
|
|
317
|
+
if self.pull_pod_fail_log:
|
|
318
|
+
error_log = self.k8s_client.get_pod_log(self.pod_name, 1000)
|
|
319
|
+
BMOBJ.clog(ctx, "pod task fail log ----------:", error_log)
|
|
320
|
+
result = False
|
|
321
|
+
return result
|
|
322
|
+
|
|
323
|
+
def sync(self):
|
|
324
|
+
"""
|
|
325
|
+
all in one
|
|
326
|
+
"""
|
|
327
|
+
try:
|
|
328
|
+
self.upload_code()
|
|
329
|
+
result = self.submit_task()
|
|
330
|
+
finally:
|
|
331
|
+
if not self.is_async:
|
|
332
|
+
# 清理代码
|
|
333
|
+
try:
|
|
334
|
+
self.clean_code()
|
|
335
|
+
except Exception as _:
|
|
336
|
+
pass
|
|
337
|
+
# 清理pod(我觉得正式环境不需要在这里清理,定时调度清理即可)
|
|
338
|
+
if self.is_debug:
|
|
339
|
+
try:
|
|
340
|
+
self.k8s_client.delete_pod(self.pod_name)
|
|
341
|
+
except Exception as _:
|
|
342
|
+
pass
|
|
343
|
+
return result
|
|
@@ -196,6 +196,7 @@ class ThinkDataQuery(BaseTga):
|
|
|
196
196
|
retry_count=2,
|
|
197
197
|
conn_timeout=30,
|
|
198
198
|
tga_data_timeout=600,
|
|
199
|
+
retry_wait=1,
|
|
199
200
|
):
|
|
200
201
|
"""
|
|
201
202
|
@des:从thinkdata的openapi获取数据----流式,为了节省内存---配合下面的getquerycsv
|
|
@@ -228,6 +229,7 @@ class ThinkDataQuery(BaseTga):
|
|
|
228
229
|
# else:
|
|
229
230
|
# break
|
|
230
231
|
if 1:
|
|
232
|
+
time.sleep(retry_wait)
|
|
231
233
|
continue
|
|
232
234
|
if gol_e is not None:
|
|
233
235
|
raise gol_e
|
|
@@ -322,7 +324,9 @@ class ThinkDataQuery(BaseTga):
|
|
|
322
324
|
BMOBJ.clog(ctx, i)
|
|
323
325
|
BMOBJ.clog(ctx, f"total: {i}")
|
|
324
326
|
if len(datas) > 0: # 保存最后收尾的
|
|
325
|
-
df = pandas.DataFrame(
|
|
327
|
+
df = pandas.DataFrame(
|
|
328
|
+
data=datas, columns=cols, dtype=object
|
|
329
|
+
) # 保存表头
|
|
326
330
|
# 解决科学计数法的问题
|
|
327
331
|
df = df.astype(str)
|
|
328
332
|
df = df.astype("string")
|
|
@@ -361,6 +365,7 @@ class ThinkDataQuery(BaseTga):
|
|
|
361
365
|
retry_count=2,
|
|
362
366
|
conn_timeout=30,
|
|
363
367
|
tga_data_timeout=600,
|
|
368
|
+
retry_wait=1,
|
|
364
369
|
):
|
|
365
370
|
"""
|
|
366
371
|
@des:从thinkdata的openapi获取数据----流式,为了节省内存---配合下面的getquerycsv
|
|
@@ -393,6 +398,7 @@ class ThinkDataQuery(BaseTga):
|
|
|
393
398
|
# else:
|
|
394
399
|
# break
|
|
395
400
|
if 1:
|
|
401
|
+
time.sleep(retry_wait)
|
|
396
402
|
continue
|
|
397
403
|
if gol_e is not None:
|
|
398
404
|
raise gol_e
|
|
@@ -409,6 +415,7 @@ class ThinkDataQuery(BaseTga):
|
|
|
409
415
|
print_size=100000,
|
|
410
416
|
conn_timeout=30,
|
|
411
417
|
tga_data_timeout=600,
|
|
418
|
+
retry_wait=1,
|
|
412
419
|
):
|
|
413
420
|
"""
|
|
414
421
|
@des: 接口装饰器--修改为get_data_csv,防止全面修改代码
|
|
@@ -423,6 +430,7 @@ class ThinkDataQuery(BaseTga):
|
|
|
423
430
|
retry_count,
|
|
424
431
|
conn_timeout,
|
|
425
432
|
tga_data_timeout,
|
|
433
|
+
retry_wait,
|
|
426
434
|
)
|
|
427
435
|
return result
|
|
428
436
|
|
|
File without changes
|
|
@@ -25,6 +25,9 @@ tfduck/s3/__init__.py
|
|
|
25
25
|
tfduck/s3/s3oper.py
|
|
26
26
|
tfduck/sagemaker/__init__.py
|
|
27
27
|
tfduck/sagemaker/saoper.py
|
|
28
|
+
tfduck/serverless_k8s/__init__.py
|
|
29
|
+
tfduck/serverless_k8s/k8s_manage.py
|
|
30
|
+
tfduck/serverless_k8s/k8s_task.py
|
|
28
31
|
tfduck/tga/__init__.py
|
|
29
32
|
tfduck/tga/base_tga.py
|
|
30
33
|
tfduck/tga/predict_sql_ltv.py
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__="0.18.5"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|