tfduck-bsd 0.16.8__tar.gz → 0.17.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tfduck-bsd might be problematic. Click here for more details.

Files changed (46) hide show
  1. {tfduck-bsd-0.16.8/tfduck_bsd.egg-info → tfduck-bsd-0.17.0}/PKG-INFO +1 -1
  2. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/setup.py +1 -1
  3. tfduck-bsd-0.17.0/tfduck/__init__.py +1 -0
  4. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/tga/train_sql_ltv.py +8 -4
  5. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/tga/train_sql_retain.py +8 -4
  6. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/tga/train_sql_yh.py +7 -4
  7. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0/tfduck_bsd.egg-info}/PKG-INFO +1 -1
  8. tfduck-bsd-0.16.8/tfduck/__init__.py +0 -1
  9. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/LICENSE +0 -0
  10. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/README.md +0 -0
  11. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/bin/tfduck +0 -0
  12. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/setup.cfg +0 -0
  13. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/bdp_sdk_py/__init__.py +0 -0
  14. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/bdp_sdk_py/config/__init__.py +0 -0
  15. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/bdp_sdk_py/config/bdpmanager.py +0 -0
  16. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/bdp_sdk_py/config/table_config.py +0 -0
  17. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/bdp_sdk_py/example.py +0 -0
  18. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/bdp_sdk_py/opends/__init__.py +0 -0
  19. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/bdp_sdk_py/opends/opends.py +0 -0
  20. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/bdp_sdk_py/opends/sdk.py +0 -0
  21. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/common/__init__.py +0 -0
  22. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/common/defines.py +0 -0
  23. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/common/extendEncoder.py +0 -0
  24. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/main.py +0 -0
  25. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/oss/__init__.py +0 -0
  26. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/oss/oss.py +0 -0
  27. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/pyspark_k8s/__init__.py +0 -0
  28. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/pyspark_k8s/k8s_manage.py +0 -0
  29. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/pyspark_k8s/spark_manage.py +0 -0
  30. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/s3/__init__.py +0 -0
  31. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/s3/s3oper.py +0 -0
  32. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/sagemaker/__init__.py +0 -0
  33. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/sagemaker/saoper.py +0 -0
  34. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/tga/__init__.py +0 -0
  35. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/tga/base_tga.py +0 -0
  36. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/tga/predict_sql_ltv.py +0 -0
  37. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/tga/predict_sql_retain.py +0 -0
  38. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/tga/predict_sql_yh.py +0 -0
  39. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/tga/tga.py +0 -0
  40. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/tga/tga_test.py +0 -0
  41. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/thinkdata/__init__.py +0 -0
  42. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck/thinkdata/query.py +0 -0
  43. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck_bsd.egg-info/SOURCES.txt +0 -0
  44. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck_bsd.egg-info/dependency_links.txt +0 -0
  45. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck_bsd.egg-info/requires.txt +0 -0
  46. {tfduck-bsd-0.16.8 → tfduck-bsd-0.17.0}/tfduck_bsd.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tfduck-bsd
3
- Version: 0.16.8
3
+ Version: 0.17.0
4
4
  Summary: A small example package
5
5
  Home-page: UNKNOWN
6
6
  Author: yuanxiao
@@ -8,7 +8,7 @@ with open("README.md", "r") as fh:
8
8
 
9
9
  setuptools.setup(
10
10
  name="tfduck-bsd",
11
- version="0.16.8",
11
+ version="0.17.0",
12
12
  author="yuanxiao",
13
13
  author_email="yuan6785@163.com",
14
14
  description="A small example package",
@@ -0,0 +1 @@
1
+ __version__="0.17.0"
@@ -243,6 +243,7 @@ class TrainFeatureSql(BaseTga):
243
243
  SELECT
244
244
  a1.user_register_time as user_register_time,
245
245
  a1."#user_id" as user_user_id,
246
+ a1."#distinct_id" as user_distinct_id,
246
247
  b1."#user_id" as event_user_id,
247
248
  CASE WHEN b1."yiap__itemrevenue" is NULL THEN 0 ELSE b1."yiap__itemrevenue" END yiap__itemrevenue,
248
249
  CASE WHEN b1."sdk_ad_price" is NULL THEN 0 ELSE b1."sdk_ad_price" END sdk_ad_price,
@@ -260,11 +261,11 @@ class TrainFeatureSql(BaseTga):
260
261
  FROM
261
262
  (
262
263
  SELECT
263
- b.event_time_utc as user_register_time, a."#user_id"
264
+ b.event_time_utc as user_register_time, a."#user_id", a."#distinct_id"
264
265
  FROM
265
266
  (
266
267
  SELECT
267
- "#user_id"
268
+ "#user_id","#distinct_id"
268
269
  FROM
269
270
  {gconf.tga_user_table}
270
271
  WHERE
@@ -333,13 +334,15 @@ class TrainFeatureSql(BaseTga):
333
334
  -- (abs(from_ieee754_64(xxhash64(cast(cast(user_user_id as varchar) as varbinary)))) % 100) / 100. as tt_stable_rand
334
335
  -- 直接用user_user_id作为排序值,排序的必须是唯一的,否则下面会对不上,会产生很多null的数据
335
336
  -- user_user_id as tt_stable_rand -- 这种方式最保险,但是不能乱序,这样采样的数据就不是随机分布在每天的
336
- bitwise_xor(user_user_id, 906867964886667264) as tt_stable_rand -- 这种方式可能会产生left null的情况,但是是少数,过滤掉就行,不影响结果,但支持乱序采样
337
+ -- bitwise_xor(user_user_id, 906867964886667264) as tt_stable_rand -- 这种方式可能会产生left null的情况,但是是少数,过滤掉就行,不影响结果,但支持乱序采样
338
+ a3.user_distinct_id as tt_stable_rand -- 这种方式最保险,即是乱序也是唯一
337
339
 
338
340
  FROM
339
341
  (
340
342
  -- 获取指定日期的注册用户 连接 这些用户在7-10天后的触发的事件 的 数量
341
343
  SELECT
342
344
  a2_1.user_user_id,
345
+ a2_1.user_distinct_id,
343
346
  a2_1.user_register_time,
344
347
  {
345
348
  [
@@ -351,6 +354,7 @@ class TrainFeatureSql(BaseTga):
351
354
  FROM (
352
355
  SELECT
353
356
  a2.user_user_id as user_user_id,
357
+ a2.user_distinct_id as user_distinct_id,
354
358
  a2.user_register_time as user_register_time,
355
359
  SUM(
356
360
  CASE
@@ -370,7 +374,7 @@ class TrainFeatureSql(BaseTga):
370
374
  (
371
375
  new_user
372
376
  ) a2
373
- GROUP BY a2.user_user_id, a2.user_register_time
377
+ GROUP BY a2.user_user_id, a2.user_distinct_id, a2.user_register_time
374
378
  ) a2_1
375
379
  ) a3
376
380
  )
@@ -232,6 +232,7 @@ class TrainFeatureSql(BaseTga):
232
232
  SELECT
233
233
  a1.user_register_time as user_register_time,
234
234
  a1."#user_id" as user_user_id,
235
+ a1."#distinct_id" as user_distinct_id,
235
236
  b1."#user_id" as event_user_id,
236
237
  -- b1."event_time_utc" as pd_event_time_utc,
237
238
  floor(to_unixtime(b1.event_time_utc))-floor(to_unixtime(a1.user_register_time)) as pd_rt_dur
@@ -248,11 +249,12 @@ class TrainFeatureSql(BaseTga):
248
249
  FROM
249
250
  (
250
251
  SELECT
251
- b.event_time_utc as user_register_time, a."#user_id"
252
+ b.event_time_utc as user_register_time, a."#user_id", a."#distinct_id"
252
253
  FROM
253
254
  (
254
255
  SELECT
255
- "#user_id"
256
+ "#user_id",
257
+ "#distinct_id"
256
258
  FROM
257
259
  {gconf.tga_user_table}
258
260
  WHERE
@@ -318,12 +320,14 @@ class TrainFeatureSql(BaseTga):
318
320
  -- (abs(from_ieee754_64(xxhash64(cast(cast(user_user_id as varchar) as varbinary)))) % 100) / 100. as tt_stable_rand
319
321
  -- 直接用user_user_id作为排序值,排序的必须是唯一的,否则下面会对不上,会产生很多null的数据
320
322
  -- user_user_id as tt_stable_rand -- 这种方式最保险,但是不能乱序,这样采样的数据就不是随机分布在每天的
321
- bitwise_xor(user_user_id, 906867964886667264) as tt_stable_rand -- 这种方式可能会产生left null的情况,但是是少数,过滤掉就行,不影响结果,但支持乱序采样
323
+ -- bitwise_xor(user_user_id, 906867964886667264) as tt_stable_rand -- 这种方式也不太行,数据会几种在某一天
324
+ a3.user_distinct_id as tt_stable_rand -- 这种方式最保险,即是乱序也是唯一
322
325
  FROM
323
326
  (
324
327
  -- 获取指定日期的注册用户 连接 这些用户在7-10天后的触发的事件 的 数量
325
328
  SELECT
326
329
  a2.user_user_id as user_user_id,
330
+ a2.user_distinct_id as user_distinct_id,
327
331
  a2.user_register_time as user_register_time,
328
332
  SUM(
329
333
  CASE
@@ -336,7 +340,7 @@ class TrainFeatureSql(BaseTga):
336
340
  (
337
341
  new_user
338
342
  ) a2
339
- GROUP BY a2.user_user_id, a2.user_register_time
343
+ GROUP BY a2.user_user_id, a2.user_distinct_id, a2.user_register_time
340
344
  ) a3
341
345
  )
342
346
  , nav_table as (
@@ -232,6 +232,7 @@ class TrainFeatureSql(BaseTga):
232
232
  SELECT
233
233
  a1.user_register_time as user_register_time,
234
234
  a1."#user_id" as user_user_id,
235
+ a1."#distinct_id" as user_distinct_id,
235
236
  b1."#user_id" as event_user_id,
236
237
  -- b1."event_time_utc" as pd_event_time_utc,
237
238
  floor(to_unixtime(b1.event_time_utc))-floor(to_unixtime(a1.user_register_time)) as pd_rt_dur
@@ -248,11 +249,11 @@ class TrainFeatureSql(BaseTga):
248
249
  FROM
249
250
  (
250
251
  SELECT
251
- b.event_time_utc as user_register_time, a."#user_id"
252
+ b.event_time_utc as user_register_time, a."#user_id", a."#distinct_id"
252
253
  FROM
253
254
  (
254
255
  SELECT
255
- "#user_id"
256
+ "#user_id","#distinct_id"
256
257
  FROM
257
258
  {gconf.tga_user_table}
258
259
  WHERE
@@ -322,19 +323,21 @@ class TrainFeatureSql(BaseTga):
322
323
  -- (abs(from_ieee754_64(xxhash64(cast(cast(user_user_id as varchar) as varbinary)))) % 100) / 100. as tt_stable_rand
323
324
  -- 直接用user_user_id作为排序值,排序的必须是唯一的,否则下面会对不上,会产生很多null的数据
324
325
  -- user_user_id as tt_stable_rand -- 这种方式最保险,但是不能乱序,这样采样的数据就不是随机分布在每天的
325
- bitwise_xor(user_user_id, 906867964886667264) as tt_stable_rand -- 这种方式可能会产生left null的情况,但是是少数,过滤掉就行,不影响结果,但支持乱序采样
326
+ --bitwise_xor(user_user_id, 906867964886667264) as tt_stable_rand -- 这种方式可能会产生left null的情况,但是是少数,过滤掉就行,不影响结果,但支持乱序采样
327
+ a3.user_distinct_id as tt_stable_rand -- 这种方式最保险,即是乱序也是唯一
326
328
  FROM
327
329
  (
328
330
  -- 获取指定日期的注册用户 连接 这些用户在7-10天后的触发的事件 的 数量
329
331
  SELECT
330
332
  a2.user_user_id as user_user_id,
333
+ a2.user_distinct_id as user_distinct_id,
331
334
  a2.user_register_time as user_register_time,
332
335
  SUM(CASE WHEN a2.event_user_id IS NULL THEN 0 ELSE 1 END) AS event_count
333
336
  FROM
334
337
  (
335
338
  new_user
336
339
  ) a2
337
- GROUP BY a2.user_user_id, a2.user_register_time
340
+ GROUP BY a2.user_user_id, a2.user_distinct_id, a2.user_register_time
338
341
  ) a3
339
342
  )
340
343
  , nav_table as (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tfduck-bsd
3
- Version: 0.16.8
3
+ Version: 0.17.0
4
4
  Summary: A small example package
5
5
  Home-page: UNKNOWN
6
6
  Author: yuanxiao
@@ -1 +0,0 @@
1
- __version__="0.16.8"
File without changes
File without changes
File without changes
File without changes
File without changes