tfduck-bsd 0.6.9__tar.gz → 0.19.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/PKG-INFO +2 -2
  2. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/setup.py +14 -5
  3. tfduck_bsd-0.19.8/tfduck/__init__.py +1 -0
  4. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/common/defines.py +141 -19
  5. tfduck_bsd-0.19.8/tfduck/common/defines_clean.py +573 -0
  6. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/common/extendEncoder.py +2 -2
  7. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/main.py +1 -1
  8. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/oss/oss.py +52 -19
  9. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/pyspark_k8s/spark_manage.py +69 -18
  10. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/s3/s3oper.py +190 -70
  11. tfduck_bsd-0.19.8/tfduck/sagemaker/saoper.py +256 -0
  12. tfduck_bsd-0.19.8/tfduck/serverless_k8s/k8s_manage.py +231 -0
  13. tfduck_bsd-0.19.8/tfduck/serverless_k8s/k8s_task.py +351 -0
  14. tfduck_bsd-0.19.8/tfduck/tga/__init__.py +0 -0
  15. tfduck_bsd-0.19.8/tfduck/tga/base_tga.py +55 -0
  16. tfduck_bsd-0.19.8/tfduck/tga/predict_sql_ltv.py +92 -0
  17. tfduck_bsd-0.19.8/tfduck/tga/predict_sql_retain.py +486 -0
  18. tfduck_bsd-0.19.8/tfduck/tga/predict_sql_yh.py +92 -0
  19. tfduck_bsd-0.19.8/tfduck/tga/tga.py +663 -0
  20. tfduck_bsd-0.19.8/tfduck/tga/tga_test.py +91 -0
  21. tfduck_bsd-0.19.8/tfduck/tga/tga_trino.py +586 -0
  22. tfduck_bsd-0.19.8/tfduck/tga/train_sql_ltv.py +613 -0
  23. tfduck_bsd-0.19.8/tfduck/tga/train_sql_retain.py +578 -0
  24. tfduck_bsd-0.19.8/tfduck/tga/train_sql_yh.py +575 -0
  25. tfduck_bsd-0.19.8/tfduck/thinkdata/__init__.py +0 -0
  26. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck_bsd.egg-info/PKG-INFO +1 -1
  27. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck_bsd.egg-info/SOURCES.txt +15 -0
  28. tfduck_bsd-0.19.8/tfduck_bsd.egg-info/requires.txt +9 -0
  29. tfduck-bsd-0.6.9/tfduck/__init__.py +0 -1
  30. tfduck-bsd-0.6.9/tfduck/tga/tga.py +0 -129
  31. tfduck-bsd-0.6.9/tfduck_bsd.egg-info/requires.txt +0 -6
  32. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/LICENSE +0 -0
  33. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/README.md +0 -0
  34. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/bin/tfduck +0 -0
  35. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/setup.cfg +0 -0
  36. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/__init__.py +0 -0
  37. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/config/__init__.py +0 -0
  38. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/config/bdpmanager.py +0 -0
  39. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/config/table_config.py +0 -0
  40. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/example.py +0 -0
  41. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/opends/__init__.py +0 -0
  42. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/opends/opends.py +0 -0
  43. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/bdp_sdk_py/opends/sdk.py +0 -0
  44. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/common/__init__.py +0 -0
  45. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/oss/__init__.py +0 -0
  46. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/pyspark_k8s/__init__.py +0 -0
  47. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/pyspark_k8s/k8s_manage.py +0 -0
  48. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/s3/__init__.py +0 -0
  49. {tfduck-bsd-0.6.9/tfduck/tga → tfduck_bsd-0.19.8/tfduck/sagemaker}/__init__.py +0 -0
  50. {tfduck-bsd-0.6.9/tfduck/thinkdata → tfduck_bsd-0.19.8/tfduck/serverless_k8s}/__init__.py +0 -0
  51. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck/thinkdata/query.py +0 -0
  52. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck_bsd.egg-info/dependency_links.txt +0 -0
  53. {tfduck-bsd-0.6.9 → tfduck_bsd-0.19.8}/tfduck_bsd.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
- Name: tfduck-bsd
3
- Version: 0.6.9
2
+ Name: tfduck_bsd
3
+ Version: 0.19.8
4
4
  Summary: A small example package
5
5
  Home-page: UNKNOWN
6
6
  Author: yuanxiao
@@ -1,11 +1,14 @@
1
+ """
2
+ 未加密的tfduck/setup.py的代码
3
+ """
1
4
  import setuptools
2
5
 
3
6
  with open("README.md", "r") as fh:
4
7
  long_description = fh.read()
5
8
 
6
9
  setuptools.setup(
7
- name="tfduck-bsd",
8
- version="0.6.9",
10
+ name="tfduck_bsd",
11
+ version="0.19.8",
9
12
  author="yuanxiao",
10
13
  author_email="yuan6785@163.com",
11
14
  description="A small example package",
@@ -15,12 +18,18 @@ setuptools.setup(
15
18
  packages=setuptools.find_packages(),
16
19
  # 需要安装的依赖
17
20
  install_requires=[
21
+ 'arrow>=0.15.5',
18
22
  'requests>=2.20.0',
19
23
  'django==2.2.12',
20
- 'oss2==2.5.0',
21
- 'ThinkingDataSdk==1.6.2', #1.1.14, 1.6.2
24
+ 'oss2==2.15.0', # 原来是2.5.0
25
+ 'ThinkingDataSdk==1.8.0', #1.1.14, 1.6.2 1.8.0支持字典和数组
22
26
  'kubernetes==12.0.1',
23
- 'boto3==1.18.36'
27
+ 'sagemaker==2.183.0', # 2.24.1# modify by yx 2023-02-27, 2023-09-18 到 2.183.0 [最后支持3.7的版本], 上一个版本2.135.0
28
+ 'boto3==1.28.49', # 1.24.27 1.18.36 # modify by yx 2023-02-27, 2023-09-18 到 1.28.49, 上一个版本1.26.79
29
+ 'trino==0.327.0'
30
+ # 下面的包不能放到这里安装,依赖太多,需要尽量简单
31
+ # 'importlib_metadata==1.6.1',
32
+ # 'duckdb==0.3.4'
24
33
  ],
25
34
  python_requires=">=3.5",
26
35
  classifiers=[
@@ -0,0 +1 @@
1
+ __version__="0.19.8"
@@ -13,10 +13,11 @@ import sys
13
13
  import shutil
14
14
  import time
15
15
  import traceback
16
+ import random
16
17
  import requests
17
18
  import os
18
19
  import uuid
19
- requests.post
20
+ import arrow
20
21
 
21
22
 
22
23
  class Et(Exception):
@@ -57,6 +58,14 @@ class BaseMethod(object):
57
58
  self.logger_django = logging.getLogger('django')
58
59
  self.logger_dadian = logging.getLogger('dadian')
59
60
  self.http_api = Dj44HttpApi()
61
+
62
+ def get_unique_id(self):
63
+ unique_id = (uuid.uuid5(uuid.NAMESPACE_DNS, str(uuid.uuid1()) + str(random.random()))).hex
64
+ return unique_id
65
+
66
+ def get_unique_id2(self):
67
+ unique_id = (uuid.uuid5(uuid.NAMESPACE_DNS, str(uuid.uuid1()) + str(random.random()))).hex
68
+ return unique_id
60
69
 
61
70
  def get_current_env(self):
62
71
  current_env = 'server'
@@ -194,7 +203,7 @@ class BaseMethod(object):
194
203
  self.log_error("clog error ----- :")
195
204
  self.logerr(e)
196
205
  # 防止过度写入日志
197
- time.sleep(0.5)
206
+ time.sleep(0.2)
198
207
 
199
208
  def get_record_now(self, ctx={}, tz="UTC"):
200
209
  """
@@ -331,7 +340,7 @@ class Dj44HttpApi(object):
331
340
  """
332
341
  @des: 外部api
333
342
  """
334
- # start----tfduck的外部接口调用
343
+
335
344
  def get_current_env(self):
336
345
  current_env = 'server'
337
346
  try:
@@ -340,18 +349,7 @@ class Dj44HttpApi(object):
340
349
  current_env = "local"
341
350
  return current_env
342
351
 
343
- def run_sptasks(self, user_token, task_names, task_param):
344
- """
345
- @des: http外部接口运行非定时的任务
346
- 一般用于多个工程的上游依赖,A->B
347
- 当A工程执行完毕后, 执行B工程的所有任务
348
- @param user_token[必填]: tfduck的用户token
349
- task_names[必填]: 运行的任务的全名的列表, 比如 ["同步任务_3",...]
350
- task_param[选填]: 任务的参数
351
- @return:
352
- 成功---{"s":1, 'task_names':[]}
353
- 失败---{"s: 非1, 'msg':'失败原因'}
354
- """
352
+ def get_host_name(self):
355
353
  if self.get_current_env() == "local":
356
354
  return {'s': 13, 'msg': 'local'}
357
355
  else:
@@ -361,14 +359,138 @@ class Dj44HttpApi(object):
361
359
  host_name = obj.contents['value']
362
360
  except:
363
361
  return {'s': 13, 'msg': 'host_name error'}
362
+ return {'s': 1, 'v': host_name}
363
+
364
+ # start----tfduck的外部接口调用
365
+ def run_sptasks_by_taskname(self, user_token, task_names):
366
+ """
367
+ @des: http外部接口运行非定时的任务
368
+ 一般用于多个工程的上游依赖,A->B
369
+ 当A工程执行完毕后, 执行B工程的所有任务
370
+ @param user_token[必填]: tfduck的用户token
371
+ task_names[必填]: 运行的sptask任务(不是任务工程,也不是任务记录)的全名的列表, 比如 ["同步任务_3",...]
372
+ @return:
373
+ 成功---{"s":1, 'value':[任务名称]}
374
+ 失败---{"s: 非1, 'msg':'失败原因'}
375
+ @example:
376
+ ######
377
+ from tfduck.common.defines import BMOBJ
378
+ BMOBJ.http_api.run_sptasks_by_taskname('xxxxx', ['helloworld_end_tf_2_0'])
379
+ ######
380
+ """
381
+ hresult = self.get_host_name()
382
+ if hresult['s'] == 1:
383
+ host_name = hresult['v']
384
+ else:
385
+ return hresult
386
+ #
387
+ url = os.path.join(host_name, 'run_sptasks_by_taskname')
388
+ data = {
389
+ 'user_token': user_token,
390
+ 'task_names': json.dumps(task_names),
391
+ }
392
+ res = requests.post(url, data=data, timeout=(5, 30))
393
+ result = res.json()
394
+ return result
395
+
396
+ def run_sptasks_by_projectname(self, user_token, project_names):
397
+ """
398
+ @des: http外部接口运行非定时的任务
399
+ 一般用于多个工程的上游依赖,A->B
400
+ 当A工程执行完毕后, 执行B工程的所有任务
401
+ @param user_token [必填]: tfduck的用户token
402
+ project_names[必填]: 运行的sptask任务工程的名称(不是任务,也不是任务记录)的全名的列表, 比如 ["同步任务工程名",...]
403
+ @return:
404
+ 成功---{"s":1, 'value':[任务名称]}
405
+ 失败---{"s: 非1, 'msg':'失败原因'}
406
+ @example:
407
+ ######
408
+ from tfduck.common.defines import BMOBJ
409
+ BMOBJ.http_api.run_sptasks_by_projectname('xxxxx', ['helloworld_end_tf_2_0'])
410
+ ######
411
+ """
412
+ hresult = self.get_host_name()
413
+ if hresult['s'] == 1:
414
+ host_name = hresult['v']
415
+ else:
416
+ return hresult
417
+ #
418
+ url = os.path.join(host_name, 'run_sptasks_by_projectname')
419
+ data = {
420
+ 'user_token': user_token,
421
+ 'project_names': json.dumps(project_names)
422
+ }
423
+ res = requests.post(url, data=data, timeout=(5, 30))
424
+ result = res.json()
425
+ return result
426
+
427
+ def get_sptasks_info_by_taskname(self, user_token, task_names, start_time, end_time):
428
+ """
429
+ @des: http外部接口查询任务的信息,根据任务名称查询
430
+ 一般用于多个工程的上游依赖,A->B
431
+ 当A工程执行完毕后, 执行B工程的所有任务
432
+ @param user_token[必填]: tfduck的用户token
433
+ task_names[必填]: 运行的sptask任务(不是任务工程,也不是任务记录)的全名的列表, 比如 ["同步任务_3",...]
434
+ start_time[选填]: 任务的create_time的查询时间范围的开始时间,格式YYYY-MM-DD HH:mm:ss 例如 2022-02-15 14:23:44
435
+ end_time [选填]: 任务的create_time的查询时间范围的结束时间,格式YYYY-MM-DD HH:mm:ss 例如 2022-02-15 18:23:44
436
+ @return:
437
+ 成功---{"s":1, 'value':[{'task__name': '任务名称', 'task__id': 109, 'id': 78, 'state': 3, 'revoke_state': 1},...]}
438
+ 失败---{"s: 非1, 'msg':'失败原因'}
439
+ @example:
440
+ ######
441
+ from tfduck.common.defines import BMOBJ
442
+ BMOBJ.http_api.get_sptasks_info_by_taskname('xxxxx', ['helloworld_end_tf_2_0'], '2022-02-15 14:23:44', '2022-02-15 18:23:44')
443
+ ######
444
+ """
445
+ hresult = self.get_host_name()
446
+ if hresult['s'] == 1:
447
+ host_name = hresult['v']
448
+ else:
449
+ return hresult
450
+ #
451
+ url = os.path.join(host_name, 'get_sptasks_info_by_taskname')
452
+ data = {
453
+ 'user_token': user_token,
454
+ 'task_names': json.dumps(task_names),
455
+ 'start_time': start_time,
456
+ 'end_time': end_time,
457
+ }
458
+ res = requests.post(url, data=data, timeout=(5, 30))
459
+ result = res.json()
460
+ return result
461
+
462
+ def get_sptasks_info_by_projectname(self, user_token, project_names, start_time, end_time):
463
+ """
464
+ @des: http外部接口查询任务的信息,根据任务工程名称查询
465
+ 一般用于多个工程的上游依赖,A->B
466
+ 当A工程执行完毕后, 执行B工程的所有任务
467
+ @param user_token[必填]: tfduck的用户token
468
+ project_names[必填]: 运行的sptask任务(不是任务工程,也不是任务记录)的全名的列表, 比如 ["同步任务_3",...]
469
+ start_time[选填]: 任务的create_time的查询时间范围的开始时间,格式YYYY-MM-DD HH:mm:ss 例如 2022-02-15 14:23:44
470
+ end_time [选填]: 任务的create_time的查询时间范围的结束时间,格式YYYY-MM-DD HH:mm:ss 例如 2022-02-15 18:23:44
471
+ @return:
472
+ 成功---{"s":1, 'value':[{'task__name': '任务名称', 'task__id': 109, 'id': 78, 'state': 3, 'revoke_state': 1},...]}
473
+ 失败---{"s: 非1, 'msg':'失败原因'}
474
+ @example:
475
+ ######
476
+ from tfduck.common.defines import BMOBJ
477
+ BMOBJ.http_api.get_sptasks_info_by_projectname('xxxxx', ['helloworld_end_tf_2_0'], '2022-02-15 14:23:44', '2022-02-15 18:23:44')
478
+ ######
479
+ """
480
+ hresult = self.get_host_name()
481
+ if hresult['s'] == 1:
482
+ host_name = hresult['v']
483
+ else:
484
+ return hresult
364
485
  #
365
- url = f"{host_name}/run_sptasks"
486
+ url = os.path.join(host_name, 'get_sptasks_info_by_projectname')
366
487
  data = {
367
488
  'user_token': user_token,
368
- 'task_names': task_names,
369
- 'task_param': task_param
489
+ 'project_names': json.dumps(project_names),
490
+ 'start_time': start_time,
491
+ 'end_time': end_time,
370
492
  }
371
- res = requests.post(url, data=data, timeout=(5, 10))
493
+ res = requests.post(url, data=data, timeout=(5, 30))
372
494
  result = res.json()
373
495
  return result
374
496