tfduck-bsd 0.6.9__tar.gz → 0.19.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/PKG-INFO +2 -2
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/setup.py +14 -5
- tfduck_bsd-0.19.8/tfduck/__init__.py +1 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/common/defines.py +141 -19
- tfduck_bsd-0.19.8/tfduck/common/defines_clean.py +573 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/common/extendEncoder.py +2 -2
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/main.py +1 -1
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/oss/oss.py +52 -19
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/pyspark_k8s/spark_manage.py +69 -18
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/s3/s3oper.py +190 -70
- tfduck_bsd-0.19.8/tfduck/sagemaker/saoper.py +256 -0
- tfduck_bsd-0.19.8/tfduck/serverless_k8s/k8s_manage.py +231 -0
- tfduck_bsd-0.19.8/tfduck/serverless_k8s/k8s_task.py +351 -0
- tfduck_bsd-0.19.8/tfduck/tga/__init__.py +0 -0
- tfduck_bsd-0.19.8/tfduck/tga/base_tga.py +55 -0
- tfduck_bsd-0.19.8/tfduck/tga/predict_sql_ltv.py +92 -0
- tfduck_bsd-0.19.8/tfduck/tga/predict_sql_retain.py +486 -0
- tfduck_bsd-0.19.8/tfduck/tga/predict_sql_yh.py +92 -0
- tfduck_bsd-0.19.8/tfduck/tga/tga.py +663 -0
- tfduck_bsd-0.19.8/tfduck/tga/tga_test.py +91 -0
- tfduck_bsd-0.19.8/tfduck/tga/tga_trino.py +586 -0
- tfduck_bsd-0.19.8/tfduck/tga/train_sql_ltv.py +613 -0
- tfduck_bsd-0.19.8/tfduck/tga/train_sql_retain.py +578 -0
- tfduck_bsd-0.19.8/tfduck/tga/train_sql_yh.py +575 -0
- tfduck_bsd-0.19.8/tfduck/thinkdata/__init__.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck_bsd.egg-info/PKG-INFO +1 -1
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck_bsd.egg-info/SOURCES.txt +15 -0
- tfduck_bsd-0.19.8/tfduck_bsd.egg-info/requires.txt +9 -0
- tfduck-bsd-0.6.9/tfduck/__init__.py +0 -1
- tfduck-bsd-0.6.9/tfduck/tga/tga.py +0 -129
- tfduck-bsd-0.6.9/tfduck_bsd.egg-info/requires.txt +0 -6
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/LICENSE +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/README.md +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/bin/tfduck +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/setup.cfg +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/__init__.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/config/__init__.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/config/bdpmanager.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/config/table_config.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/example.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/opends/__init__.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/opends/opends.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/opends/sdk.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/common/__init__.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/oss/__init__.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/pyspark_k8s/__init__.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/pyspark_k8s/k8s_manage.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/s3/__init__.py +0 -0
- {tfduck-bsd-0.6.9/tfduck/tga → tfduck_bsd-0.19.8/tfduck/sagemaker}/__init__.py +0 -0
- {tfduck-bsd-0.6.9/tfduck/thinkdata → tfduck_bsd-0.19.8/tfduck/serverless_k8s}/__init__.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/thinkdata/query.py +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck_bsd.egg-info/dependency_links.txt +0 -0
- {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck_bsd.egg-info/top_level.txt +0 -0
|
@@ -1,11 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
未加密的tfduck/setup.py的代码
|
|
3
|
+
"""
|
|
1
4
|
import setuptools
|
|
2
5
|
|
|
3
6
|
with open("README.md", "r") as fh:
|
|
4
7
|
long_description = fh.read()
|
|
5
8
|
|
|
6
9
|
setuptools.setup(
|
|
7
|
-
name="
|
|
8
|
-
version="0.
|
|
10
|
+
name="tfduck_bsd",
|
|
11
|
+
version="0.19.8",
|
|
9
12
|
author="yuanxiao",
|
|
10
13
|
author_email="yuan6785@163.com",
|
|
11
14
|
description="A small example package",
|
|
@@ -15,12 +18,18 @@ setuptools.setup(
|
|
|
15
18
|
packages=setuptools.find_packages(),
|
|
16
19
|
# 需要安装的依赖
|
|
17
20
|
install_requires=[
|
|
21
|
+
'arrow>=0.15.5',
|
|
18
22
|
'requests>=2.20.0',
|
|
19
23
|
'django==2.2.12',
|
|
20
|
-
'oss2==2.
|
|
21
|
-
'ThinkingDataSdk==1.
|
|
24
|
+
'oss2==2.15.0', # 原来是2.5.0
|
|
25
|
+
'ThinkingDataSdk==1.8.0', #1.1.14, 1.6.2 1.8.0支持字典和数组
|
|
22
26
|
'kubernetes==12.0.1',
|
|
23
|
-
'
|
|
27
|
+
'sagemaker==2.183.0', # 2.24.1# modify by yx 2023-02-27, 2023-09-18 到 2.183.0 [最后支持3.7的版本], 上一个版本2.135.0
|
|
28
|
+
'boto3==1.28.49', # 1.24.27 1.18.36 # modify by yx 2023-02-27, 2023-09-18 到 1.28.49, 上一个版本1.26.79
|
|
29
|
+
'trino==0.327.0'
|
|
30
|
+
# 下面的包不能放到这里安装,依赖太多,需要尽量简单
|
|
31
|
+
# 'importlib_metadata==1.6.1',
|
|
32
|
+
# 'duckdb==0.3.4'
|
|
24
33
|
],
|
|
25
34
|
python_requires=">=3.5",
|
|
26
35
|
classifiers=[
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__="0.19.8"
|
|
@@ -13,10 +13,11 @@ import sys
|
|
|
13
13
|
import shutil
|
|
14
14
|
import time
|
|
15
15
|
import traceback
|
|
16
|
+
import random
|
|
16
17
|
import requests
|
|
17
18
|
import os
|
|
18
19
|
import uuid
|
|
19
|
-
|
|
20
|
+
import arrow
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class Et(Exception):
|
|
@@ -57,6 +58,14 @@ class BaseMethod(object):
|
|
|
57
58
|
self.logger_django = logging.getLogger('django')
|
|
58
59
|
self.logger_dadian = logging.getLogger('dadian')
|
|
59
60
|
self.http_api = Dj44HttpApi()
|
|
61
|
+
|
|
62
|
+
def get_unique_id(self):
|
|
63
|
+
unique_id = (uuid.uuid5(uuid.NAMESPACE_DNS, str(uuid.uuid1()) + str(random.random()))).hex
|
|
64
|
+
return unique_id
|
|
65
|
+
|
|
66
|
+
def get_unique_id2(self):
|
|
67
|
+
unique_id = (uuid.uuid5(uuid.NAMESPACE_DNS, str(uuid.uuid1()) + str(random.random()))).hex
|
|
68
|
+
return unique_id
|
|
60
69
|
|
|
61
70
|
def get_current_env(self):
|
|
62
71
|
current_env = 'server'
|
|
@@ -194,7 +203,7 @@ class BaseMethod(object):
|
|
|
194
203
|
self.log_error("clog error ----- :")
|
|
195
204
|
self.logerr(e)
|
|
196
205
|
# 防止过度写入日志
|
|
197
|
-
time.sleep(0.
|
|
206
|
+
time.sleep(0.2)
|
|
198
207
|
|
|
199
208
|
def get_record_now(self, ctx={}, tz="UTC"):
|
|
200
209
|
"""
|
|
@@ -331,7 +340,7 @@ class Dj44HttpApi(object):
|
|
|
331
340
|
"""
|
|
332
341
|
@des: 外部api
|
|
333
342
|
"""
|
|
334
|
-
|
|
343
|
+
|
|
335
344
|
def get_current_env(self):
|
|
336
345
|
current_env = 'server'
|
|
337
346
|
try:
|
|
@@ -340,18 +349,7 @@ class Dj44HttpApi(object):
|
|
|
340
349
|
current_env = "local"
|
|
341
350
|
return current_env
|
|
342
351
|
|
|
343
|
-
def
|
|
344
|
-
"""
|
|
345
|
-
@des: http外部接口运行非定时的任务
|
|
346
|
-
一般用于多个工程的上游依赖,A->B
|
|
347
|
-
当A工程执行完毕后, 执行B工程的所有任务
|
|
348
|
-
@param user_token[必填]: tfduck的用户token
|
|
349
|
-
task_names[必填]: 运行的任务的全名的列表, 比如 ["同步任务_3",...]
|
|
350
|
-
task_param[选填]: 任务的参数
|
|
351
|
-
@return:
|
|
352
|
-
成功---{"s":1, 'task_names':[]}
|
|
353
|
-
失败---{"s: 非1, 'msg':'失败原因'}
|
|
354
|
-
"""
|
|
352
|
+
def get_host_name(self):
|
|
355
353
|
if self.get_current_env() == "local":
|
|
356
354
|
return {'s': 13, 'msg': 'local'}
|
|
357
355
|
else:
|
|
@@ -361,14 +359,138 @@ class Dj44HttpApi(object):
|
|
|
361
359
|
host_name = obj.contents['value']
|
|
362
360
|
except:
|
|
363
361
|
return {'s': 13, 'msg': 'host_name error'}
|
|
362
|
+
return {'s': 1, 'v': host_name}
|
|
363
|
+
|
|
364
|
+
# start----tfduck的外部接口调用
|
|
365
|
+
def run_sptasks_by_taskname(self, user_token, task_names):
|
|
366
|
+
"""
|
|
367
|
+
@des: http外部接口运行非定时的任务
|
|
368
|
+
一般用于多个工程的上游依赖,A->B
|
|
369
|
+
当A工程执行完毕后, 执行B工程的所有任务
|
|
370
|
+
@param user_token[必填]: tfduck的用户token
|
|
371
|
+
task_names[必填]: 运行的sptask任务(不是任务工程,也不是任务记录)的全名的列表, 比如 ["同步任务_3",...]
|
|
372
|
+
@return:
|
|
373
|
+
成功---{"s":1, 'value':[任务名称]}
|
|
374
|
+
失败---{"s: 非1, 'msg':'失败原因'}
|
|
375
|
+
@example:
|
|
376
|
+
######
|
|
377
|
+
from tfduck.common.defines import BMOBJ
|
|
378
|
+
BMOBJ.http_api.run_sptasks_by_taskname('xxxxx', ['helloworld_end_tf_2_0'])
|
|
379
|
+
######
|
|
380
|
+
"""
|
|
381
|
+
hresult = self.get_host_name()
|
|
382
|
+
if hresult['s'] == 1:
|
|
383
|
+
host_name = hresult['v']
|
|
384
|
+
else:
|
|
385
|
+
return hresult
|
|
386
|
+
#
|
|
387
|
+
url = os.path.join(host_name, 'run_sptasks_by_taskname')
|
|
388
|
+
data = {
|
|
389
|
+
'user_token': user_token,
|
|
390
|
+
'task_names': json.dumps(task_names),
|
|
391
|
+
}
|
|
392
|
+
res = requests.post(url, data=data, timeout=(5, 30))
|
|
393
|
+
result = res.json()
|
|
394
|
+
return result
|
|
395
|
+
|
|
396
|
+
def run_sptasks_by_projectname(self, user_token, project_names):
|
|
397
|
+
"""
|
|
398
|
+
@des: http外部接口运行非定时的任务
|
|
399
|
+
一般用于多个工程的上游依赖,A->B
|
|
400
|
+
当A工程执行完毕后, 执行B工程的所有任务
|
|
401
|
+
@param user_token [必填]: tfduck的用户token
|
|
402
|
+
project_names[必填]: 运行的sptask任务工程的名称(不是任务,也不是任务记录)的全名的列表, 比如 ["同步任务工程名",...]
|
|
403
|
+
@return:
|
|
404
|
+
成功---{"s":1, 'value':[任务名称]}
|
|
405
|
+
失败---{"s: 非1, 'msg':'失败原因'}
|
|
406
|
+
@example:
|
|
407
|
+
######
|
|
408
|
+
from tfduck.common.defines import BMOBJ
|
|
409
|
+
BMOBJ.http_api.run_sptasks_by_projectname('xxxxx', ['helloworld_end_tf_2_0'])
|
|
410
|
+
######
|
|
411
|
+
"""
|
|
412
|
+
hresult = self.get_host_name()
|
|
413
|
+
if hresult['s'] == 1:
|
|
414
|
+
host_name = hresult['v']
|
|
415
|
+
else:
|
|
416
|
+
return hresult
|
|
417
|
+
#
|
|
418
|
+
url = os.path.join(host_name, 'run_sptasks_by_projectname')
|
|
419
|
+
data = {
|
|
420
|
+
'user_token': user_token,
|
|
421
|
+
'project_names': json.dumps(project_names)
|
|
422
|
+
}
|
|
423
|
+
res = requests.post(url, data=data, timeout=(5, 30))
|
|
424
|
+
result = res.json()
|
|
425
|
+
return result
|
|
426
|
+
|
|
427
|
+
def get_sptasks_info_by_taskname(self, user_token, task_names, start_time, end_time):
|
|
428
|
+
"""
|
|
429
|
+
@des: http外部接口查询任务的信息,根据任务名称查询
|
|
430
|
+
一般用于多个工程的上游依赖,A->B
|
|
431
|
+
当A工程执行完毕后, 执行B工程的所有任务
|
|
432
|
+
@param user_token[必填]: tfduck的用户token
|
|
433
|
+
task_names[必填]: 运行的sptask任务(不是任务工程,也不是任务记录)的全名的列表, 比如 ["同步任务_3",...]
|
|
434
|
+
start_time[选填]: 任务的create_time的查询时间范围的开始时间,格式YYYY-MM-DD HH:mm:ss 例如 2022-02-15 14:23:44
|
|
435
|
+
end_time [选填]: 任务的create_time的查询时间范围的结束时间,格式YYYY-MM-DD HH:mm:ss 例如 2022-02-15 18:23:44
|
|
436
|
+
@return:
|
|
437
|
+
成功---{"s":1, 'value':[{'task__name': '任务名称', 'task__id': 109, 'id': 78, 'state': 3, 'revoke_state': 1},...]}
|
|
438
|
+
失败---{"s: 非1, 'msg':'失败原因'}
|
|
439
|
+
@example:
|
|
440
|
+
######
|
|
441
|
+
from tfduck.common.defines import BMOBJ
|
|
442
|
+
BMOBJ.http_api.get_sptasks_info_by_taskname('xxxxx', ['helloworld_end_tf_2_0'], '2022-02-15 14:23:44', '2022-02-15 18:23:44')
|
|
443
|
+
######
|
|
444
|
+
"""
|
|
445
|
+
hresult = self.get_host_name()
|
|
446
|
+
if hresult['s'] == 1:
|
|
447
|
+
host_name = hresult['v']
|
|
448
|
+
else:
|
|
449
|
+
return hresult
|
|
450
|
+
#
|
|
451
|
+
url = os.path.join(host_name, 'get_sptasks_info_by_taskname')
|
|
452
|
+
data = {
|
|
453
|
+
'user_token': user_token,
|
|
454
|
+
'task_names': json.dumps(task_names),
|
|
455
|
+
'start_time': start_time,
|
|
456
|
+
'end_time': end_time,
|
|
457
|
+
}
|
|
458
|
+
res = requests.post(url, data=data, timeout=(5, 30))
|
|
459
|
+
result = res.json()
|
|
460
|
+
return result
|
|
461
|
+
|
|
462
|
+
def get_sptasks_info_by_projectname(self, user_token, project_names, start_time, end_time):
|
|
463
|
+
"""
|
|
464
|
+
@des: http外部接口查询任务的信息,根据任务工程名称查询
|
|
465
|
+
一般用于多个工程的上游依赖,A->B
|
|
466
|
+
当A工程执行完毕后, 执行B工程的所有任务
|
|
467
|
+
@param user_token[必填]: tfduck的用户token
|
|
468
|
+
project_names[必填]: 运行的sptask任务(不是任务工程,也不是任务记录)的全名的列表, 比如 ["同步任务_3",...]
|
|
469
|
+
start_time[选填]: 任务的create_time的查询时间范围的开始时间,格式YYYY-MM-DD HH:mm:ss 例如 2022-02-15 14:23:44
|
|
470
|
+
end_time [选填]: 任务的create_time的查询时间范围的结束时间,格式YYYY-MM-DD HH:mm:ss 例如 2022-02-15 18:23:44
|
|
471
|
+
@return:
|
|
472
|
+
成功---{"s":1, 'value':[{'task__name': '任务名称', 'task__id': 109, 'id': 78, 'state': 3, 'revoke_state': 1},...]}
|
|
473
|
+
失败---{"s: 非1, 'msg':'失败原因'}
|
|
474
|
+
@example:
|
|
475
|
+
######
|
|
476
|
+
from tfduck.common.defines import BMOBJ
|
|
477
|
+
BMOBJ.http_api.get_sptasks_info_by_projectname('xxxxx', ['helloworld_end_tf_2_0'], '2022-02-15 14:23:44', '2022-02-15 18:23:44')
|
|
478
|
+
######
|
|
479
|
+
"""
|
|
480
|
+
hresult = self.get_host_name()
|
|
481
|
+
if hresult['s'] == 1:
|
|
482
|
+
host_name = hresult['v']
|
|
483
|
+
else:
|
|
484
|
+
return hresult
|
|
364
485
|
#
|
|
365
|
-
url =
|
|
486
|
+
url = os.path.join(host_name, 'get_sptasks_info_by_projectname')
|
|
366
487
|
data = {
|
|
367
488
|
'user_token': user_token,
|
|
368
|
-
'
|
|
369
|
-
'
|
|
489
|
+
'project_names': json.dumps(project_names),
|
|
490
|
+
'start_time': start_time,
|
|
491
|
+
'end_time': end_time,
|
|
370
492
|
}
|
|
371
|
-
res = requests.post(url, data=data, timeout=(5,
|
|
493
|
+
res = requests.post(url, data=data, timeout=(5, 30))
|
|
372
494
|
result = res.json()
|
|
373
495
|
return result
|
|
374
496
|
|