tfduck-bsd 0.17.8__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tfduck-bsd might be problematic. Click here for more details.

tfduck/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__="0.17.8"
1
+ __version__="0.18.0"
@@ -156,45 +156,42 @@ class BaseMethod(object):
156
156
 
157
157
  def get_record_now(self, ctx={}, tz="UTC"):
158
158
  """
159
- @des: 获取执行任务记录的创建时间,方便dag里面取根据现在时间去执行part_date
159
+ @des: 获取执行任务记录的创建时间,方便dag里面取根据现在时间去执行part_date-----其实这个方法用不到,因为都是在dag里面用,dag用的是defines.BMOBJ,因为这个dag是必须带默认环境的
160
160
  start = arrow.now(tz="Asia/Shanghai")
161
161
  @ return: datetime 带时区
162
162
  """
163
- if self.get_current_env() == "local":
163
+ # 如果是查看任务,而不是任务记录,是不会有这两个参数的
164
+ task_type = ctx.get('task_type', None)
165
+ trid = ctx.get('trid', None)
166
+ if trid is None:
164
167
  root_create_time = arrow.now(tz=tz)
165
168
  else:
166
- # 如果是查看任务,而不是任务记录,是不会有这两个参数的
167
- task_type = ctx.get('task_type', None)
168
- trid = ctx.get('trid', None)
169
- if trid is None:
170
- root_create_time = arrow.now(tz=tz)
171
- else:
172
- Record = None
173
- if task_type == "dtask":
174
- # Record = apps.get_model("dtask", 'RunRecord')
175
- app_name = "dtask"
176
- model_name = "RunRecord"
177
- elif task_type == "retask":
178
- # Record = apps.get_model("retask", 'RETaskRecord')
179
- app_name = "retask"
180
- model_name = "RETaskRecord"
181
- elif task_type == "sptask":
182
- # Record = apps.get_model("sptask", 'DPTaskRecord')
183
- app_name = "sptask"
184
- model_name = "DPTaskRecord"
185
- if Record is not None:
186
- # obj = Record.objects.get(id=trid)
187
- objs = self.http_api.get_model_data(app_name, model_name, {"id": trid}, ["extra", "create_time"])
188
- #
189
- obj = objs[0]
190
- obj['extras'] = json.loads(obj['extra'])
191
- #
192
- root_create_time = obj['extras'].get("root_create_time", None)
193
- if root_create_time is None: # 如果拿不到,那么obj肯定是根节点
194
- root_create_time = arrow.get(obj['create_time']).to(tz)
195
- else: # 读取extra的信息,即根节点
196
- # root_create_time带有时区信息,可以直接get
197
- root_create_time = arrow.get(root_create_time).to(tz)
169
+ # Record = None
170
+ if task_type == "dtask":
171
+ # Record = apps.get_model("dtask", 'RunRecord')
172
+ app_name = "dtask"
173
+ model_name = "RunRecord"
174
+ elif task_type == "retask":
175
+ # Record = apps.get_model("retask", 'RETaskRecord')
176
+ app_name = "retask"
177
+ model_name = "RETaskRecord"
178
+ elif task_type == "sptask":
179
+ # Record = apps.get_model("sptask", 'DPTaskRecord')
180
+ app_name = "sptask"
181
+ model_name = "DPTaskRecord"
182
+ if app_name is not None:
183
+ # obj = Record.objects.get(id=trid)
184
+ objs = self.http_api.get_model_data(app_name, model_name, {"id": trid}, ["extra", "create_time"])
185
+ #
186
+ obj = objs[0]
187
+ obj['extras'] = json.loads(obj['extra'])
188
+ #
189
+ root_create_time = obj['extras'].get("root_create_time", None)
190
+ if root_create_time is None: # 如果拿不到,那么obj肯定是根节点
191
+ root_create_time = arrow.get(obj['create_time']).to(tz)
192
+ else: # 读取extra的信息,即根节点
193
+ # root_create_time带有时区信息,可以直接get
194
+ root_create_time = arrow.get(root_create_time).to(tz)
198
195
  # 返回
199
196
  return root_create_time.datetime
200
197
 
@@ -247,6 +244,27 @@ class BaseMethod(object):
247
244
  result.append(str(e2))
248
245
  return "\n".join(result)
249
246
 
247
+ def logerr(self, e):
248
+ errorMeg = ''
249
+ try:
250
+ for file, lineno, function, text in traceback.extract_tb(sys.exc_info()[2]):
251
+ errorMeg += '%s\n%s, in %s\n%s: %s!' % (
252
+ str(e), file, function, lineno, text)
253
+ self.log_error("error"+"*"*50)
254
+ for error in errorMeg.split("\n"):
255
+ self.log_error(error, "error")
256
+ try:
257
+ error_self = "".join(traceback.format_exception_only(type(e), e)) # 捕获错误本身,这样才能捕获到compile里面的错误
258
+ self.log_error(error_self, "error")
259
+ except:
260
+ pass
261
+ try:
262
+ self.log_error(getattr(e, '_msg', 'exception'))
263
+ except Exception as e1:
264
+ self.log_error(getattr(e, 'msg', 'exception'))
265
+ except Exception as e2:
266
+ self.log_error(e2)
267
+
250
268
  def get_log_str(self, *msgs):
251
269
  try:
252
270
  msg_str = " ".join([str(msg) for msg in msgs])
@@ -283,6 +301,8 @@ class Dj44HttpApi(object):
283
301
  self.user_token = ''
284
302
 
285
303
  def get_user_token(self):
304
+ if self.user_token == '':
305
+ raise Et(2, "请先设置user_token")
286
306
  return self.user_token
287
307
 
288
308
  def set_user_token(self, user_token):
@@ -291,7 +311,12 @@ class Dj44HttpApi(object):
291
311
 
292
312
  def get_host_name(self):
293
313
  # 后面可以改成从配置中心获取
294
- return {'s': 1, 'v': "http://tfduck.163py.com"}
314
+ PROJECT_ENV = os.environ.get("PROJECT_ENV", "dev")
315
+ if PROJECT_ENV == "dev": # 本地测试
316
+ endpoint_url = "http://localhost:8000"
317
+ else:
318
+ endpoint_url = "http://tfduck.163py.com" # 后面可以改成从配置中心获取
319
+ return {'s': 1, 'v': endpoint_url}
295
320
 
296
321
  # 旧的接口也从tfduck的外部接口调用
297
322
 
tfduck/tga/tga.py CHANGED
@@ -72,19 +72,19 @@ class ThinkDataQuery(BaseTga):
72
72
  mode=mode, header=header)
73
73
  return True
74
74
 
75
- def get_data_csv_i(self, ctx, unique_path, sql, block_size=100000, print_size=100000, read_timeout=600, upcount=None):
75
+ def get_data_csv_i(self, ctx, unique_path, sql, block_size=100000, print_size=100000, read_timeout=600, upcount=None, conn_timeout=30, tga_data_timeout=600):
76
76
  """
77
77
  @des:从thinkdata的openapi获取数据----流式,为了节省内存---配合下面的getquerycsv
78
78
  """
79
79
  session = requests.session()
80
- post_data = {'token': self.token, 'sql': sql}
80
+ post_data = {'token': self.token, 'sql': sql, 'timeoutSeconds': tga_data_timeout}
81
81
  #
82
82
  unique_path = self.gen_local_unique_file()
83
83
  #
84
84
  BMOBJ.log_error("in query")
85
85
  #
86
86
  r = session.post(self.query_uri, data=post_data, stream=True,
87
- verify=False, timeout=(30, read_timeout))
87
+ verify=False, timeout=(conn_timeout, read_timeout))
88
88
  datas = []
89
89
  i = 0 # 循环引用计数
90
90
  icount = 0 # 数据的数量
@@ -150,7 +150,7 @@ class ThinkDataQuery(BaseTga):
150
150
  pass
151
151
  return unique_path
152
152
 
153
- def get_data_csv(self, ctx, sql, block_size=100000, print_size=100000, read_timeout=600, upcount=None, retry_count=2):
153
+ def get_data_csv(self, ctx, sql, block_size=100000, print_size=100000, read_timeout=600, upcount=None, retry_count=2, conn_timeout=30, tga_data_timeout=600):
154
154
  """
155
155
  @des:从thinkdata的openapi获取数据----流式,为了节省内存---配合下面的getquerycsv
156
156
  """
@@ -160,7 +160,7 @@ class ThinkDataQuery(BaseTga):
160
160
  for i in range(retry_count):
161
161
  try:
162
162
  result = self.get_data_csv_i(
163
- ctx, unique_path, sql, block_size, print_size, read_timeout, upcount)
163
+ ctx, unique_path, sql, block_size, print_size, read_timeout, upcount, conn_timeout, tga_data_timeout)
164
164
  return result
165
165
  except Exception as e:
166
166
  gol_e = e
@@ -174,15 +174,15 @@ class ThinkDataQuery(BaseTga):
174
174
  if gol_e is not None:
175
175
  raise gol_e
176
176
 
177
- def get_data_raw_pyhive(self, ctx, sql, block_size=100000, fetch_size=10000, retry_count=2, read_timeout=300, upcount=None, print_size=100000):
177
+ def get_data_raw_pyhive(self, ctx, sql, block_size=100000, fetch_size=10000, retry_count=2, read_timeout=300, upcount=None, print_size=100000, conn_timeout=30, tga_data_timeout=600):
178
178
  """
179
179
  @des: 接口装饰器--修改为get_data_csv,防止全面修改代码
180
180
  """
181
- result = self.get_data_csv(ctx, sql, block_size, print_size, read_timeout, upcount, retry_count)
181
+ result = self.get_data_csv(ctx, sql, block_size, print_size, read_timeout, upcount, retry_count, conn_timeout, tga_data_timeout)
182
182
  return result
183
183
 
184
184
 
185
- def get_data_raw_pyhive_bck(self, ctx, sql, block_size=100000, fetch_size=10000, retry_count=2, read_timeout=300, upcount=None, print_size=100000):
185
+ def get_data_raw_pyhive_bck(self, ctx, sql, block_size=100000, fetch_size=10000, retry_count=2, read_timeout=300, upcount=None, print_size=100000, conn_timeout=30, tga_data_timeout=600):
186
186
  '''
187
187
  @des:presto直连方式读取-----重试的方式----当get_data_csv接口出问题,则启用这个接口
188
188
  tobj = ThinkDataQuery("http://queryhost:port/querySql", "查询token",
@@ -196,7 +196,7 @@ class ThinkDataQuery(BaseTga):
196
196
  for i in range(retry_count):
197
197
  try:
198
198
  result = self.get_data_raw_pyhive_i(
199
- ctx, unique_path, sql, block_size, fetch_size, read_timeout, upcount, print_size)
199
+ ctx, unique_path, sql, block_size, fetch_size, read_timeout, upcount, print_size, conn_timeout)
200
200
  return result
201
201
  except Exception as e:
202
202
  gol_e = e
@@ -210,7 +210,7 @@ class ThinkDataQuery(BaseTga):
210
210
  if gol_e is not None:
211
211
  raise gol_e
212
212
 
213
- def get_data_raw_pyhive_i(self, ctx, unique_path, sql, block_size=100000, fetch_size=10000, read_timeout=300, upcount=None, print_size=100000):
213
+ def get_data_raw_pyhive_i(self, ctx, unique_path, sql, block_size=100000, fetch_size=10000, read_timeout=300, upcount=None, print_size=100000, conn_timeout=30):
214
214
  '''
215
215
  @des: 内部调用
216
216
  '''
@@ -235,7 +235,7 @@ class ThinkDataQuery(BaseTga):
235
235
  # 这里stream为true和false没有关系,fetchmany每次都会通过request_session传nexturl重新get获取数据
236
236
  # 参考pyhive/presto.py的_fetch_more,每次fetchmany其实是多次fetchone
237
237
  requests_kwargs={"timeout": (
238
- 30, read_timeout), "stream": True, "verify": False}
238
+ conn_timeout, read_timeout), "stream": True, "verify": False}
239
239
  )
240
240
  cursor = conn.cursor()
241
241
  cursor.execute(sql)
@@ -303,3 +303,59 @@ class ThinkDataQuery(BaseTga):
303
303
  except:
304
304
  pass
305
305
  return unique_path
306
+
307
+ """
308
+ 数据打入接口--start--
309
+ """
310
+
311
+ def set_tga_user_data(self, tga_app_no, sec_token, url, data, is_set_once=False):
312
+ """
313
+ @des: 用户数据打入tga
314
+ @params:
315
+ tga_app_no: tga项目的id,注意不是app_id
316
+ sec_token: 安全的二次部署服务器的token
317
+ url: 二次部署的服务器打入的url
318
+ data: 数据 [['tga用户的distinct_id,为空就传None', 'tga用户的account_id为空就传None', '打入的用户属性为一个dict' ], ...,]
319
+ 例如: [['eqw31231231', 'fads21321312312', {'a':1, 'b':2}], ...,]
320
+ """
321
+ pass
322
+
323
+ def set_tga_event_data_trac(self, tga_app_no, sec_token, url, data):
324
+ """
325
+ @des: 普通事件数据打入tga
326
+ @params:
327
+ tga_app_no: tga项目的id,注意不是app_id
328
+ sec_token: 安全的二次部署服务器的token
329
+ url: 二次部署的服务器打入的url
330
+ data: 数据 [['事件名称', 'tga用户的distinct_id,为空就传None', 'tga用户的account_id为空就传None', '打入的用户属性为一个dict' ], ...,]
331
+ 例如: [['new_session', 'eqw31231231', 'fads21321312312', {'a':1, 'b':2}], ...,]
332
+ """
333
+ pass
334
+
335
+ def set_tga_event_data_trac_update(self, tga_app_no, sec_token, url, data):
336
+ """
337
+ @des: 可更新事件数据打入tga, 重写部分数据
338
+ @params:
339
+ tga_app_no: tga项目的id,注意不是app_id
340
+ sec_token: 安全的二次部署服务器的token
341
+ url: 二次部署的服务器打入的url
342
+ data: 数据 [['事件名称', '事件唯一id', 'tga用户的distinct_id,为空就传None', 'tga用户的account_id为空就传None', , '打入的用户属性为一个dict' ], ...,]
343
+ 例如: [['new_session', 'event_id_123', 'eqw31231231', 'fads21321312312', {'a':1, 'b':2}], ...,]
344
+ """
345
+ pass
346
+
347
+ def set_tga_event_data_trac_overwrite(self, tga_app_no, sec_token, url, data):
348
+ """
349
+ @des: 可更新事件数据打入tga, 重写全部数据
350
+ @params:
351
+ tga_app_no: tga项目的id,注意不是app_id
352
+ sec_token: 安全的二次部署服务器的token
353
+ url: 二次部署的服务器打入的url
354
+ data: 数据 [['事件名称', '事件唯一id', 'tga用户的distinct_id,为空就传None', 'tga用户的account_id为空就传None', , '打入的用户属性为一个dict' ], ...,]
355
+ 例如: [['new_session', 'event_id_123', 'eqw31231231', 'fads21321312312', {'a':1, 'b':2}], ...,]
356
+ """
357
+ pass
358
+
359
+ """
360
+ 数据打入接口----end-----
361
+ """
@@ -335,7 +335,7 @@ class TrainFeatureSql(BaseTga):
335
335
  -- 直接用user_user_id作为排序值,排序的必须是唯一的,否则下面会对不上,会产生很多null的数据
336
336
  -- user_user_id as tt_stable_rand -- 这种方式最保险,但是不能乱序,这样采样的数据就不是随机分布在每天的
337
337
  -- bitwise_xor(user_user_id, 906867964886667264) as tt_stable_rand -- 这种方式可能会产生left null的情况,但是是少数,过滤掉就行,不影响结果,但支持乱序采样
338
- a3.user_distinct_id as tt_stable_rand -- 这种方式最保险,即是乱序也是唯一
338
+ a3.user_distinct_id as tt_stable_rand -- 这种方式最保险,即是乱序也是唯一, 就算limit分几部分取特征值,这几部分也是可以对应上的
339
339
 
340
340
  FROM
341
341
  (
@@ -107,7 +107,7 @@ class TrainFeatureSql(BaseTga):
107
107
  user_cols1='1=1',
108
108
  user_cols2='1=1',
109
109
  event_cols1='1=1',
110
- addon_attrs=[],
110
+ addon_attrs=[],
111
111
  **kwargs):
112
112
  """
113
113
  @des: 参数说明看上面的文档说明
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tfduck-bsd
3
- Version: 0.17.8
3
+ Version: 0.18.0
4
4
  Summary: A small example package
5
5
  Home-page: UNKNOWN
6
6
  Author: yuanxiao
@@ -13,6 +13,7 @@ Classifier: Operating System :: OS Independent
13
13
  Requires-Python: >=3.5
14
14
  Description-Content-Type: text/markdown
15
15
  License-File: LICENSE
16
+ Requires-Dist: arrow (>=0.15.5)
16
17
  Requires-Dist: requests (>=2.20.0)
17
18
  Requires-Dist: django (==2.2.12)
18
19
  Requires-Dist: oss2 (==2.15.0)
@@ -1,4 +1,4 @@
1
- tfduck/__init__.py,sha256=JJRjv3zZ3ki2vwIQ0GH-QI69Reem5LIvEvqJU3y0eCM,20
1
+ tfduck/__init__.py,sha256=xg0NwcLTzmqN-Xa9iGt2_NsDBIOcmZkITwbvTeLIC6U,20
2
2
  tfduck/main.py,sha256=zNTC16wkwGJ0QX1-i8vzlGophOxmFuO4SLsF1tkjsbE,14670
3
3
  tfduck/bdp_sdk_py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  tfduck/bdp_sdk_py/example.py,sha256=Xq1_gcSyu0zho_iMmMYfYgMblcgF8a-GwRBWPTw0FuU,2879
@@ -10,7 +10,7 @@ tfduck/bdp_sdk_py/opends/opends.py,sha256=ny2bcB7gCv6aYXrdL2fbGe2kJDQ-hamxunerYq
10
10
  tfduck/bdp_sdk_py/opends/sdk.py,sha256=zagIhg0gQYnGmEnFgfxBwo4TW17GjeVrYaS7Qt-Vtow,19136
11
11
  tfduck/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  tfduck/common/defines.py,sha256=pHhQ35TdB83GWs75hEN-b5qGqLO6Ipf8EfQsHolGZkE,17629
13
- tfduck/common/defines_clean.py,sha256=D5W1t7RCKJoWIp31RYDrhB6E0qENadH5PeSzWdwuThU,19483
13
+ tfduck/common/defines_clean.py,sha256=1U6jNw_9YTTO_EznH6vBavSGsYI6G9i2TnSbVxXlMwk,20642
14
14
  tfduck/common/extendEncoder.py,sha256=k2s_FEJdBDVCx5CU3ADwJGlr7NUkPP9HKu_epZcpiTo,1525
15
15
  tfduck/oss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  tfduck/oss/oss.py,sha256=bQuEjUKzPdV2MbGd33pzm5z9l50KHXwVW23wOuRJ9NE,19871
@@ -26,16 +26,16 @@ tfduck/tga/base_tga.py,sha256=rg1BHaIKGSwUVbOlWSA3Y1arB8J_caemjTjH9jh8GYM,2184
26
26
  tfduck/tga/predict_sql_ltv.py,sha256=25rpOZdHyMcEU3O8u67oUpLsTiZburEPsvXR38hTUJ0,3589
27
27
  tfduck/tga/predict_sql_retain.py,sha256=Nsl0lSZ_CC8j_GB4jLrJgkoqqDRAAPYwzo7vAOYZ764,19772
28
28
  tfduck/tga/predict_sql_yh.py,sha256=uYeuCZX2btxO-pvjrhlmuG35PquJbLvVtQEuEZHu8Cs,3588
29
- tfduck/tga/tga.py,sha256=_0kDEDLI7TMGkCUqmBO-yMwuEy3bRTXUSIJAHcQgyEc,12654
29
+ tfduck/tga/tga.py,sha256=p9AcaiPiGrHaOvfJGnpzNaFK8OeDL5NbCU770OaOTWc,15605
30
30
  tfduck/tga/tga_test.py,sha256=A3n2LdvgQWlkX6E54K6cnsIUeYrgHlzG3QYbA7ZKgHk,2750
31
- tfduck/tga/train_sql_ltv.py,sha256=hBksB-fukA4Q6LuBo7B93XyFc0VNWyVj8pVNaV8xYYc,26592
32
- tfduck/tga/train_sql_retain.py,sha256=o64RBfnUTJtfnTKBVE4zy_kbEtB7g5YmshyWczbcSe0,24336
31
+ tfduck/tga/train_sql_ltv.py,sha256=VZqGy0FbwLHkjQClV1IIMd6_E_H6BrIqL5p_WUfTUXc,26668
32
+ tfduck/tga/train_sql_retain.py,sha256=AIOJKWC37j4UdM8JLFS6LdJFtABjMq9gOi3xvAs4fAE,24335
33
33
  tfduck/tga/train_sql_yh.py,sha256=nb5BO_vOv0eKY2kOVt5ZOfM1cvfI8j2no8cYpCL_rNE,24378
34
34
  tfduck/thinkdata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  tfduck/thinkdata/query.py,sha256=DsfcxjZrc0ZFTwN2pI5fKdM1Bwr6ageoPcA2MP3r2bE,1314
36
- tfduck_bsd-0.17.8.data/scripts/tfduck,sha256=UsuoAs4peJW4I-e6Gn91gEToP_YyuUp-rUUg3ObKneY,192
37
- tfduck_bsd-0.17.8.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
38
- tfduck_bsd-0.17.8.dist-info/METADATA,sha256=hBLN9umQHLIC--gwZwfdBa_gA-4XVUXx5COIW-Cz3qo,971
39
- tfduck_bsd-0.17.8.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
40
- tfduck_bsd-0.17.8.dist-info/top_level.txt,sha256=503etRkoyeI1VYcAwe5KpD5Bamhx0R0y2ofkE8HpRDA,7
41
- tfduck_bsd-0.17.8.dist-info/RECORD,,
36
+ tfduck_bsd-0.18.0.data/scripts/tfduck,sha256=UsuoAs4peJW4I-e6Gn91gEToP_YyuUp-rUUg3ObKneY,192
37
+ tfduck_bsd-0.18.0.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
38
+ tfduck_bsd-0.18.0.dist-info/METADATA,sha256=kdiL2okuGWTB1xd643seSd0pXOGoND5IxL5z2PVLIEc,1003
39
+ tfduck_bsd-0.18.0.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
40
+ tfduck_bsd-0.18.0.dist-info/top_level.txt,sha256=503etRkoyeI1VYcAwe5KpD5Bamhx0R0y2ofkE8HpRDA,7
41
+ tfduck_bsd-0.18.0.dist-info/RECORD,,