tfduck-bsd 0.18.5__tar.gz → 0.18.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tfduck-bsd might be problematic. Click here for more details.

Files changed (50) hide show
  1. {tfduck-bsd-0.18.5/tfduck_bsd.egg-info → tfduck-bsd-0.18.7}/PKG-INFO +1 -1
  2. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/setup.py +1 -1
  3. tfduck-bsd-0.18.7/tfduck/__init__.py +1 -0
  4. tfduck-bsd-0.18.7/tfduck/serverless_k8s/k8s_manage.py +228 -0
  5. tfduck-bsd-0.18.7/tfduck/serverless_k8s/k8s_task.py +343 -0
  6. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/tga.py +9 -1
  7. tfduck-bsd-0.18.7/tfduck/thinkdata/__init__.py +0 -0
  8. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7/tfduck_bsd.egg-info}/PKG-INFO +1 -1
  9. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck_bsd.egg-info/SOURCES.txt +3 -0
  10. tfduck-bsd-0.18.5/tfduck/__init__.py +0 -1
  11. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/LICENSE +0 -0
  12. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/README.md +0 -0
  13. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/bin/tfduck +0 -0
  14. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/setup.cfg +0 -0
  15. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/__init__.py +0 -0
  16. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/config/__init__.py +0 -0
  17. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/config/bdpmanager.py +0 -0
  18. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/config/table_config.py +0 -0
  19. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/example.py +0 -0
  20. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/opends/__init__.py +0 -0
  21. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/opends/opends.py +0 -0
  22. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/bdp_sdk_py/opends/sdk.py +0 -0
  23. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/common/__init__.py +0 -0
  24. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/common/defines.py +0 -0
  25. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/common/defines_clean.py +0 -0
  26. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/common/extendEncoder.py +0 -0
  27. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/main.py +0 -0
  28. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/oss/__init__.py +0 -0
  29. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/oss/oss.py +0 -0
  30. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/pyspark_k8s/__init__.py +0 -0
  31. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/pyspark_k8s/k8s_manage.py +0 -0
  32. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/pyspark_k8s/spark_manage.py +0 -0
  33. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/s3/__init__.py +0 -0
  34. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/s3/s3oper.py +0 -0
  35. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/sagemaker/__init__.py +0 -0
  36. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/sagemaker/saoper.py +0 -0
  37. {tfduck-bsd-0.18.5/tfduck/tga → tfduck-bsd-0.18.7/tfduck/serverless_k8s}/__init__.py +0 -0
  38. {tfduck-bsd-0.18.5/tfduck/thinkdata → tfduck-bsd-0.18.7/tfduck/tga}/__init__.py +0 -0
  39. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/base_tga.py +0 -0
  40. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/predict_sql_ltv.py +0 -0
  41. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/predict_sql_retain.py +0 -0
  42. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/predict_sql_yh.py +0 -0
  43. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/tga_test.py +0 -0
  44. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/train_sql_ltv.py +0 -0
  45. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/train_sql_retain.py +0 -0
  46. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/tga/train_sql_yh.py +0 -0
  47. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck/thinkdata/query.py +0 -0
  48. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck_bsd.egg-info/dependency_links.txt +0 -0
  49. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck_bsd.egg-info/requires.txt +0 -0
  50. {tfduck-bsd-0.18.5 → tfduck-bsd-0.18.7}/tfduck_bsd.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tfduck-bsd
3
- Version: 0.18.5
3
+ Version: 0.18.7
4
4
  Summary: A small example package
5
5
  Home-page: UNKNOWN
6
6
  Author: yuanxiao
@@ -8,7 +8,7 @@ with open("README.md", "r") as fh:
8
8
 
9
9
  setuptools.setup(
10
10
  name="tfduck-bsd",
11
- version="0.18.5",
11
+ version="0.18.7",
12
12
  author="yuanxiao",
13
13
  author_email="yuan6785@163.com",
14
14
  description="A small example package",
@@ -0,0 +1 @@
1
+ __version__="0.18.7"
@@ -0,0 +1,228 @@
1
+ """
2
+ auhtor: yuanxiao
3
+ datetime: 20210423
4
+ python3.8
5
+ pyspark 提交阿里云ask集群管理器
6
+ pip install kubernetes==12.0.1 oss2==2.19.1
7
+
8
+ 如果报错【CA_KEY_TOO_SMALL】则执行(因为k8s的老的秘钥创建是1024位的):
9
+ 参考: https://www.cnblogs.com/dingnosakura/p/17815410.html
10
+ """
11
+
12
+ if 1:
13
+ # 解决 【CA_KEY_TOO_SMALL】的报错,利用猴子补丁[monkey Patching ] ,适用于urllib3==2.x版本
14
+ import urllib3
15
+ from urllib3.util.ssl_ import create_urllib3_context
16
+
17
+ # 创建自定义 SSL 上下文(复用 urllib3 默认配置基础上修改)
18
+ ctx = create_urllib3_context()
19
+ ctx.set_ciphers("DEFAULT@SECLEVEL=0") # 例如降低安全级别
20
+ ctx.load_default_certs() # 确保加载系统证书
21
+
22
+ # 保存原始 PoolManager 初始化方法(便于回滚)
23
+ _original_poolmanager_init = urllib3.PoolManager.__init__
24
+
25
+ def _patched_poolmanager_init(self, *args, **kwargs):
26
+ # 仅在未显式传递 ssl_context 时注入自定义上下文
27
+ if "ssl_context" not in kwargs:
28
+ kwargs["ssl_context"] = ctx
29
+ _original_poolmanager_init(self, *args, **kwargs)
30
+
31
+ # 应用补丁(确保只执行一次)
32
+ if urllib3.PoolManager.__init__ != _patched_poolmanager_init:
33
+ urllib3.PoolManager.__init__ = _patched_poolmanager_init
34
+
35
+ import json
36
+
37
+ # from tfduck.common.defines import BMOBJ, Et
38
+ # from tfduck.oss.oss import AliyunOss
39
+ from kubernetes import client, config, utils
40
+ import uuid
41
+ import arrow
42
+ import os
43
+ import base64
44
+ import subprocess
45
+ from io import BytesIO, StringIO
46
+ import yaml
47
+
48
+
49
+ class K8sManage(object):
50
+ """ """
51
+
52
+ def __init__(
53
+ self, ctx, k8s_conn_yaml=None, k8s_conn_json=None, namespace="default"
54
+ ):
55
+ """
56
+ @des: 初始化
57
+ k8s_conn_yaml ---- k8s的配置文件,从阿里云下载的,保存为yaml文件
58
+ k8s_conn_json ---- 从k8s的配置yaml文件解析成的json对象,解析方法: (list(yaml.safe_load_all(open(self.k8s_conn_config, "r")))[0])
59
+ """
60
+ """
61
+ 全局初始化
62
+ """
63
+ self.ctx = ctx
64
+ self.k8s_conn_config = k8s_conn_yaml
65
+ self.k8s_conn_json = k8s_conn_json
66
+ self.namespace = namespace
67
+ """
68
+ 生成k8s客户端
69
+ """
70
+ self.k8s_client = self.get_k8s_client(mode="v1")
71
+ self.k8s_api_client = self.get_k8s_client(mode="api")
72
+ #
73
+ # print(self.k8s_client, self.k8s_api_client)
74
+
75
+ def get_k8s_client(self, mode="v1"):
76
+ """
77
+ @des: 获取k8s操作客户端
78
+ 例子 https://github.com/kubernetes-client/python/tree/master/examples
79
+ api文档 https://github.com/kubernetes-client/python/blob/master/kubernetes/README.md
80
+ """
81
+ # if 1:
82
+ # # 从yaml生成json文件进行登录,方便后面程序配置
83
+ # with open(self.k8s_conn_config, "r") as f:
84
+ # config_jsons = yaml.safe_load_all(f)
85
+ # config_json = list(config_jsons)[0]
86
+ # print(111, config_json)
87
+ # config.load_kube_config_from_dict(config_json)
88
+ # else:
89
+ # config.load_kube_config(self.k8s_conn_config)
90
+ if self.k8s_conn_config:
91
+ config.load_kube_config(self.k8s_conn_config)
92
+ elif self.k8s_conn_json:
93
+ # 这样获取
94
+ # with open(self.k8s_conn_config, "r") as f:
95
+ # config_jsons = yaml.safe_load_all(f)
96
+ # k8s_conn_json = list(config_jsons)[0]
97
+ config.load_kube_config_from_dict(self.k8s_conn_json)
98
+ if mode == "v1":
99
+ c = client.CoreV1Api()
100
+ elif mode == "api":
101
+ c = client.ApiClient()
102
+ return c
103
+ # print("Listing pods with their IPs:")
104
+ # ret = v1.list_pod_for_all_namespaces(watch=False)
105
+ # for i in ret.items:
106
+ # print("%s\t%s\t%s" %
107
+ # (i.status.pod_ip, i.metadata.namespace, i.metadata.name))
108
+
109
+ def get_all_pods(self):
110
+ """
111
+ @des: 获取所有的k8s的pod, 默认命名空间的
112
+ """
113
+ # print("Listing pods with their IPs:")
114
+ ret = self.k8s_client.list_pod_for_all_namespaces(watch=False)
115
+ pod_infos = []
116
+ for i in ret.items:
117
+ if i.metadata.namespace == self.namespace:
118
+ pod_infos.append(
119
+ {
120
+ "name": i.metadata.name,
121
+ "status": i.status.phase,
122
+ "create_time": i.metadata.creation_timestamp,
123
+ }
124
+ )
125
+ return pod_infos
126
+
127
+ def get_pod_info(self, pod_name):
128
+ """
129
+ @des: 获取pod的基础信息,状态等信息
130
+ """
131
+ # resp = self.k8s_client.read_namespaced_pod_status(
132
+ # name="tfspark-test-7200-driver-1e5ab8f2a41d11eb93c3acde48001122", namespace='default')
133
+ # resp = self.k8s_client.read_namespaced_pod(
134
+ # name="tfspark-test-7200-driver-1e5ab8f2a41d11eb93c3acde48001122", namespace='default')
135
+ pod_obj = self.k8s_client.read_namespaced_pod(
136
+ name=pod_name, namespace=self.namespace
137
+ )
138
+ # print(resp)
139
+ # print(resp.metadata.name, resp.status.phase)
140
+ # return resp.status.phase # Pending,Succeeded, Running, Failed
141
+ return pod_obj
142
+
143
+ def get_pod_log(self, pod_name, lines_count=1000):
144
+ """
145
+ @des: 获取pod的日志--测试成功
146
+ """
147
+ # print(help(self.k8s_client.read_namespaced_pod_log))
148
+ # log_content = self.k8s_client.read_namespaced_pod_log(
149
+ # 'tfspark-test-7200-driver-c697aadea41d11eb9d2bacde48001122', 'default', pretty=True, tail_lines=1) # container='spark-kubernetes-driver'
150
+ log_content = self.k8s_client.read_namespaced_pod_log(
151
+ pod_name, self.namespace, pretty=True, tail_lines=lines_count
152
+ ) # tail_lines 表示从结尾开始获取日志的行数
153
+ # print(log_content)
154
+ return log_content
155
+
156
+ # def patch_pod_info(self, pod_name):
157
+ # """
158
+ # @des: 修改pod的信息,包括状态
159
+ # """
160
+ # pod_obj = self.get_pod_info(pod_name)
161
+ # pod_obj.status.phase = ""
162
+ # # print(help(self.k8s_client.patch_namespaced_pod))
163
+ # self.k8s_client.patch_namespaced_pod(pod_name, 'default', pod_obj)
164
+ def create_by_yaml(self, yaml_file):
165
+ """ """
166
+ result = utils.create_from_yaml(
167
+ k8s_client=self.k8s_api_client,
168
+ yaml_file=yaml_file,
169
+ namespace=self.namespace,
170
+ )
171
+ # print(result)
172
+ # print("count: ", len(result))
173
+
174
+ def create_by_yaml_str(self, yaml_str):
175
+ """
176
+ @des: 通过yaml字符串创建pod
177
+ """
178
+ with StringIO(yaml_str) as f:
179
+ yml_document_all = yaml.safe_load_all(f)
180
+ failures = []
181
+ k8s_objects = []
182
+ for yml_document in yml_document_all:
183
+ # print(111, json.dumps(yml_document)) ---- 后面也可以编写json创建
184
+ try:
185
+ created = utils.create_from_dict(
186
+ k8s_client=self.k8s_api_client,
187
+ data=yml_document,
188
+ namespace=self.namespace,
189
+ )
190
+ k8s_objects.append(created)
191
+ except utils.FailToCreateError as failure:
192
+ failures.extend(failure.api_exceptions)
193
+ if failures:
194
+ raise utils.FailToCreateError(failures)
195
+ # print(k8s_objects)
196
+ # print("count: ", len(k8s_objects))
197
+ return k8s_objects
198
+
199
+ def create_by_json(self, json_obj):
200
+ """
201
+ @des: 通过json对象创建pod
202
+ """
203
+ failures = []
204
+ try:
205
+ created = utils.create_from_dict(
206
+ k8s_client=self.k8s_api_client,
207
+ data=json_obj,
208
+ namespace=self.namespace,
209
+ )
210
+ except utils.FailToCreateError as failure:
211
+ failures.extend(failure.api_exceptions)
212
+ if failures:
213
+ raise utils.FailToCreateError(failures)
214
+ return created
215
+
216
+ def delete_pod(self, pod_name):
217
+ """
218
+ @des: 删除pod
219
+ delete是删除---
220
+ delete_namespaced_pod
221
+ """
222
+ # print(help(self.k8s_client.delete_namespaced_pod))
223
+ # pod_obj = self.get_pod_info(pod_name)
224
+ # print(pod_obj.metadata.name, pod_obj.metadata.creation_timestamp, pod_obj.status.phase)
225
+ # pod_create = pod_obj.metadata.creation_timestamp
226
+ # status = pod_obj.status.phase
227
+ r = self.k8s_client.delete_namespaced_pod(pod_name, self.namespace)
228
+ return r
@@ -0,0 +1,343 @@
1
+ """
2
+ auhtor: yuanxiao
3
+ datetime: 20210423
4
+ python3.8
5
+ ask serverless的提交任务
6
+ pip install kubernetes==12.0.1
7
+ """
8
+
9
+ import json
10
+ # from tfduck.common.defines import BMOBJ, Et
11
+
12
+ # from tfduck.oss.oss import AliyunOss
13
+ import arrow
14
+ import time
15
+ import os
16
+ import base64
17
+ import subprocess
18
+ import uuid
19
+
20
+ if 1:
21
+ from k8s_manage import K8sManage
22
+ from k8s_upload_oss import AliyunOss
23
+
24
+
25
+ # class YxBMOBJ(object):
26
+ # def clog(self, ctx, *args):
27
+ # print(*args)
28
+ # pass
29
+
30
+ # def get_file_path(self, path):
31
+ # """
32
+ # @des: 文件的真实路径
33
+ # """
34
+ # # self.dj44_base_dir = "/Users/yuanxiao/workspace/djcelery44/djcelery44"
35
+ # # return os.path.join(self.dj44_base_dir, os.path.join("dags/sptasks/p_code", path))
36
+ # return os.path.abspath(path)
37
+
38
+
39
+ # BMOBJ = YxBMOBJ()
40
+
41
+ if 1:
42
+ from tfduck.common.defines import BMOBJ, Et
43
+ from tfduck.oss.oss import AliyunOss
44
+
45
+
46
+ class Et(Exception):
47
+ def __init__(self, code, msg):
48
+ self.code = code
49
+ self.msg = msg
50
+
51
+
52
+ class ServerlessTaskManage(object):
53
+ """ """
54
+
55
+ def __init__(
56
+ self,
57
+ ctx,
58
+ code_path,
59
+ max_run_time=7200,
60
+ # 参考main的调用
61
+ task_config={
62
+ # pod的名称前缀-必填
63
+ "task_name": "yxtestpodtask",
64
+ # cpu的使用量-必填
65
+ "cpu": "500m",
66
+ # 内存的使用量-必填
67
+ "memory": "500Mi",
68
+ # 镜像地址-必填
69
+ "image_url": "registry-intl.cn-beijing.aliyuncs.com/talefun/python311:base",
70
+ # serverless集群挂载的oss的pvc的名称,必须在【duck-task】命名空间的pvc-必填
71
+ "pvc_name": "tfduck-k8s-pvc",
72
+ # pypi的源,如果为空字符串,则使用官方源
73
+ "pypi_mirror": "https://pypi.tuna.tsinghua.edu.cn/simple",
74
+ },
75
+ #
76
+ is_debug=False,
77
+ project_name="playdayy-bj",
78
+ pull_pod_success_log=True,
79
+ pull_pod_fail_log=True,
80
+ is_async=False,
81
+ project_configs={},
82
+ ):
83
+ """
84
+ ######################################特别注意############################################
85
+ code_path: 代码路径
86
+ project_configs: 所有项目配置
87
+ project_name: 项目名称,在project_configs里面找到对应的key
88
+ ######################################
89
+ """
90
+ """
91
+ 校验
92
+ """
93
+ task_sub_name = task_config.get("task_name", "default")
94
+ if task_sub_name is None:
95
+ raise Et(2, "task name must not be none")
96
+ task_sub_name = task_sub_name.strip()
97
+ if task_sub_name == "":
98
+ raise Et(2, "task name must not be '' ")
99
+ if task_sub_name.find("-") != -1:
100
+ raise Et(2, "task name not clude - char")
101
+ """
102
+ 工程配置
103
+ """
104
+ self.project_name = project_name # 切换本地调试集群
105
+ self.project_configs = project_configs
106
+ self.project_conf = self.project_configs[self.project_name]
107
+ """
108
+ 全局初始化
109
+ """
110
+ self.ctx = ctx
111
+ self.is_debug = is_debug # 如果为true,就在mac本地调试
112
+ self.max_run_time = max_run_time # pod task最大运行时间
113
+ self.code_path = code_path
114
+ self.pull_pod_success_log = pull_pod_success_log
115
+ self.pull_pod_fail_log = pull_pod_fail_log
116
+ self.task_config = task_config
117
+ self.pod_name = f"{task_sub_name}-{self.max_run_time}-{uuid.uuid4().hex[:8]}"
118
+ self.oss_root_name = "oss_data"
119
+ self.is_async = is_async
120
+ """
121
+ 系统oss的配置
122
+ """
123
+ self.oss_config = self.project_conf["oss_pv"]
124
+ """
125
+ 生成k8s客户端
126
+ """
127
+ self.name_space = "duck-task"
128
+ self.k8s_client = K8sManage(
129
+ ctx,
130
+ k8s_conn_json=self.project_conf["k8s_serverless_conn"],
131
+ namespace=self.name_space,
132
+ )
133
+
134
+ def upload_code(self):
135
+ """
136
+ 上传执行代码到oss
137
+ """
138
+ now = arrow.utcnow()
139
+ now_str = now.format("YYYY-MM-DD")
140
+ oss_config = self.oss_config
141
+ ctx = self.ctx
142
+ uoss = AliyunOss(
143
+ oss_config["bucket"],
144
+ oss_config["access_key"],
145
+ oss_config["secret_key"],
146
+ oss_config["endpoint_public"],
147
+ )
148
+ oss_file_path = f"code/{now_str}/{uuid.uuid4().hex}/" # 注意必须斜杠结尾
149
+ if self.code_path:
150
+ local_file_path = BMOBJ.get_file_path(self.code_path)
151
+ if not local_file_path.startswith("/"):
152
+ local_file_path = f"/{local_file_path}"
153
+ uoss.upload_oss(
154
+ ctx,
155
+ local_file_path,
156
+ oss_file_path,
157
+ False,
158
+ False,
159
+ isrm=True,
160
+ isdel=False,
161
+ ) # 递归查找, 不删除本地文件
162
+ else:
163
+ raise Et("code_path不能为空")
164
+ # uoss.download_oss(ctx, local_file_path+"xx", oss_file_path, isrm=True, isdel=False) # 测试下载
165
+ self.real_code_path = os.path.join(f"/{self.oss_root_name}", oss_file_path)
166
+
167
+ def clean_code(self):
168
+ """
169
+ 清理代码
170
+ """
171
+ ctx = self.ctx
172
+ oss_config = self.oss_config
173
+ uoss = AliyunOss(
174
+ oss_config["bucket"],
175
+ oss_config["access_key"],
176
+ oss_config["secret_key"],
177
+ oss_config["endpoint_public"],
178
+ )
179
+ # self.real_code_path 不要self.oss_root_name部分即可
180
+ oss_code_path = self.real_code_path.replace(f"/{self.oss_root_name}/", "")
181
+ BMOBJ.clog(ctx, "clean code path:", oss_code_path)
182
+ uoss.delete_prefix_oss(ctx, oss_code_path, isrm=True)
183
+
184
+ def get_submit_task_cmd(self):
185
+ """
186
+ 构建
187
+ """
188
+ pypi_mirror = self.task_config.get("pypi_mirror", "")
189
+ pypi_mirror_host = (
190
+ pypi_mirror.split("://")[1].split("/")[0] if pypi_mirror else ""
191
+ )
192
+ pypi_i = (
193
+ f"-i {pypi_mirror} --trusted-host {pypi_mirror_host}" if pypi_mirror else ""
194
+ )
195
+ """
196
+ json命令,参考该目录的k8s_manage.py的如何根据yaml解析json的方式
197
+ 参考yaml_to_json.py
198
+ """
199
+ cmd_json = {
200
+ "apiVersion": "v1",
201
+ "kind": "Pod",
202
+ "metadata": {"name": self.pod_name, "namespace": self.name_space},
203
+ "spec": {
204
+ "containers": [
205
+ {
206
+ "name": "python311-container",
207
+ "image": self.task_config["image_url"],
208
+ "command": ["/bin/bash", "-c"],
209
+ "args": [
210
+ f"""cd {self.real_code_path} && \
211
+ python3 --version && \
212
+ pip install -U pip {pypi_i} && \
213
+ pip install arrow {pypi_i} && \
214
+ pip install -r requirements.txt {pypi_i} && \
215
+ python main.py
216
+ """
217
+ ],
218
+ "resources": {
219
+ "requests": {
220
+ "memory": self.task_config["memory"],
221
+ "cpu": self.task_config["cpu"],
222
+ },
223
+ "limits": {
224
+ "memory": self.task_config["memory"],
225
+ "cpu": self.task_config["cpu"],
226
+ },
227
+ },
228
+ "volumeMounts": [
229
+ {
230
+ "name": "tfduck-k8s-pv",
231
+ "mountPath": f"/{self.oss_root_name}",
232
+ }
233
+ ],
234
+ }
235
+ ],
236
+ "volumes": [
237
+ {
238
+ "name": "tfduck-k8s-pv",
239
+ "persistentVolumeClaim": {
240
+ "claimName": self.task_config["pvc_name"]
241
+ },
242
+ }
243
+ ],
244
+ "restartPolicy": "Never",
245
+ },
246
+ }
247
+ return cmd_json
248
+
249
+ def submit_task(self):
250
+ """
251
+ 提交任务
252
+ """
253
+ result = True
254
+ ctx = self.ctx
255
+ """
256
+ 执行任务
257
+ """
258
+ submit_json = self.get_submit_task_cmd()
259
+ try:
260
+ self.k8s_client.create_by_json(submit_json)
261
+ except Exception as e:
262
+ raise e
263
+ """
264
+ 判断任务是否完成---不从日志里面去判断,这样会拉回来很多日志
265
+ """
266
+ pod_obj = self.k8s_client.get_pod_info(self.pod_name)
267
+ if self.is_async:
268
+ BMOBJ.clog(ctx, pod_obj.status.phase)
269
+ if pod_obj.status.phase in ["Pending", "Running"]:
270
+ BMOBJ.clog(ctx, "task success runing")
271
+ else:
272
+ BMOBJ.clog(ctx, "task fail")
273
+ result = False
274
+ else:
275
+ start = time.time()
276
+ #
277
+ wait_time = 0
278
+ # 第一阶段判断--进入Running状态--
279
+ pd_1 = 0
280
+ pd_1_status = ["Running"]
281
+ max_pd_1 = 30 # 最大等待次数
282
+ while pod_obj.status.phase not in pd_1_status and pd_1 < max_pd_1:
283
+ time.sleep(10)
284
+ try:
285
+ pod_obj = self.k8s_client.get_pod_info(self.pod_name)
286
+ except Exception as _:
287
+ pass
288
+ pd_1 += 1
289
+ if pod_obj.status.phase not in pd_1_status:
290
+ BMOBJ.clog(
291
+ ctx,
292
+ f"task fail in {time.time() - start} seconds state: {pod_obj.status.phase}",
293
+ )
294
+ result = False
295
+ return result
296
+ # 第二阶段判断--进入Succeeded或者Failed状态--
297
+ while (
298
+ pod_obj.status.phase not in ["Succeeded", "Failed"]
299
+ or wait_time > self.max_run_time
300
+ ):
301
+ time.sleep(10)
302
+ try:
303
+ pod_obj = self.k8s_client.get_pod_info(self.pod_name)
304
+ except Exception as _:
305
+ pass
306
+ wait_time = time.time() - start
307
+ #
308
+ BMOBJ.clog(ctx, f"pod status: {pod_obj.status.phase}")
309
+ #
310
+ if pod_obj.status.phase == "Succeeded":
311
+ BMOBJ.clog(ctx, "task success complated")
312
+ if self.pull_pod_success_log:
313
+ success_log = self.k8s_client.get_pod_log(self.pod_name, 500)
314
+ BMOBJ.clog(ctx, "pod task success log ----------:", success_log)
315
+ else:
316
+ BMOBJ.clog(ctx, "task fail-------------------error log:")
317
+ if self.pull_pod_fail_log:
318
+ error_log = self.k8s_client.get_pod_log(self.pod_name, 1000)
319
+ BMOBJ.clog(ctx, "pod task fail log ----------:", error_log)
320
+ result = False
321
+ return result
322
+
323
+ def sync(self):
324
+ """
325
+ all in one
326
+ """
327
+ try:
328
+ self.upload_code()
329
+ result = self.submit_task()
330
+ finally:
331
+ if not self.is_async:
332
+ # 清理代码
333
+ try:
334
+ self.clean_code()
335
+ except Exception as _:
336
+ pass
337
+ # 清理pod(我觉得正式环境不需要在这里清理,定时调度清理即可)
338
+ if self.is_debug:
339
+ try:
340
+ self.k8s_client.delete_pod(self.pod_name)
341
+ except Exception as _:
342
+ pass
343
+ return result
@@ -196,6 +196,7 @@ class ThinkDataQuery(BaseTga):
196
196
  retry_count=2,
197
197
  conn_timeout=30,
198
198
  tga_data_timeout=600,
199
+ retry_wait=1,
199
200
  ):
200
201
  """
201
202
  @des:从thinkdata的openapi获取数据----流式,为了节省内存---配合下面的getquerycsv
@@ -228,6 +229,7 @@ class ThinkDataQuery(BaseTga):
228
229
  # else:
229
230
  # break
230
231
  if 1:
232
+ time.sleep(retry_wait)
231
233
  continue
232
234
  if gol_e is not None:
233
235
  raise gol_e
@@ -322,7 +324,9 @@ class ThinkDataQuery(BaseTga):
322
324
  BMOBJ.clog(ctx, i)
323
325
  BMOBJ.clog(ctx, f"total: {i}")
324
326
  if len(datas) > 0: # 保存最后收尾的
325
- df = pandas.DataFrame(data=datas, columns=cols, dtype=object) # 保存表头
327
+ df = pandas.DataFrame(
328
+ data=datas, columns=cols, dtype=object
329
+ ) # 保存表头
326
330
  # 解决科学计数法的问题
327
331
  df = df.astype(str)
328
332
  df = df.astype("string")
@@ -361,6 +365,7 @@ class ThinkDataQuery(BaseTga):
361
365
  retry_count=2,
362
366
  conn_timeout=30,
363
367
  tga_data_timeout=600,
368
+ retry_wait=1,
364
369
  ):
365
370
  """
366
371
  @des:从thinkdata的openapi获取数据----流式,为了节省内存---配合下面的getquerycsv
@@ -393,6 +398,7 @@ class ThinkDataQuery(BaseTga):
393
398
  # else:
394
399
  # break
395
400
  if 1:
401
+ time.sleep(retry_wait)
396
402
  continue
397
403
  if gol_e is not None:
398
404
  raise gol_e
@@ -409,6 +415,7 @@ class ThinkDataQuery(BaseTga):
409
415
  print_size=100000,
410
416
  conn_timeout=30,
411
417
  tga_data_timeout=600,
418
+ retry_wait=1,
412
419
  ):
413
420
  """
414
421
  @des: 接口装饰器--修改为get_data_csv,防止全面修改代码
@@ -423,6 +430,7 @@ class ThinkDataQuery(BaseTga):
423
430
  retry_count,
424
431
  conn_timeout,
425
432
  tga_data_timeout,
433
+ retry_wait,
426
434
  )
427
435
  return result
428
436
 
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tfduck-bsd
3
- Version: 0.18.5
3
+ Version: 0.18.7
4
4
  Summary: A small example package
5
5
  Home-page: UNKNOWN
6
6
  Author: yuanxiao
@@ -25,6 +25,9 @@ tfduck/s3/__init__.py
25
25
  tfduck/s3/s3oper.py
26
26
  tfduck/sagemaker/__init__.py
27
27
  tfduck/sagemaker/saoper.py
28
+ tfduck/serverless_k8s/__init__.py
29
+ tfduck/serverless_k8s/k8s_manage.py
30
+ tfduck/serverless_k8s/k8s_task.py
28
31
  tfduck/tga/__init__.py
29
32
  tfduck/tga/base_tga.py
30
33
  tfduck/tga/predict_sql_ltv.py
@@ -1 +0,0 @@
1
- __version__="0.18.5"
File without changes
File without changes
File without changes
File without changes
File without changes