upplib 3.4.5__py3-none-any.whl → 3.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upplib/text_to_text.py CHANGED
@@ -1,31 +1,86 @@
1
+ from collections import OrderedDict
1
2
  from upplib import *
2
3
  from upplib.common_package import *
3
4
 
4
5
 
5
6
  def to_list_list(list_data: list[str] = None,
6
- count: int = None) -> list[list[str]]:
7
+ count: int = None,
8
+ sep_list_word: str = None) -> list[list[str]]:
7
9
  """
8
10
  将 list 切分成 list(list)
9
11
  组成一个 list(list) 数据
10
12
  多行空行,自动合并到一行空行
13
+
14
+ 使用方式:
15
+ 1. 情况1:
16
+ - 传入 count > 0 时, 按固定数量切分:
17
+ to_list_list(list_data, 4)
18
+ 2. 情况2:
19
+ - sep_list_word = 'THIS_LINE_IS_TITLE'
20
+ 不传 count 或 count 为 None/0 时,
21
+ 按行首包含 'THIS_LINE_IS_TITLE' 的标题进行分组:
22
+ 所有相同标题(如 'THIS_LINE_IS_TITLE7.1')及其后内容
23
+ 会被合并到同一个子 list 中, 并按照标题首次出现的顺序返回。
11
24
  :param list_data: 原始 list 数据
12
- :param count: 每个 list(list) 数据 包含的元素数量
13
- :return:
25
+ :param count: 每个 list(list) 数据 包含的元素数量; 不传则按标题拆分
26
+ :return: list(list[str])
14
27
  """
28
+ if list_data is None:
29
+ list_data = []
30
+
31
+ # 兼容旧逻辑: 传入 count 时按数量切分
15
32
  if count is not None and count > 0:
16
- if list_data is None:
17
- list_data = []
18
- r_list = []
19
- o_list = []
33
+ r_list: list[list[str]] = []
34
+ o_list: list[str] = []
20
35
  c = 0
21
- for i in range(len(list_data)):
22
- o_list.append(list_data[i])
36
+ for item in list_data:
37
+ o_list.append(item)
23
38
  c += 1
24
39
  if c == count:
25
40
  r_list.append(o_list)
26
41
  o_list = []
27
42
  c = 0
28
- if len(o_list):
43
+ if o_list:
29
44
  r_list.append(o_list)
30
45
  return r_list
46
+
47
+ # 新逻辑: 按标题拆分
48
+ # 期望输入示例:
49
+ # 阿斯顿发sas地方0
50
+ # THIS_LINE_IS_TITLE7.1 (重点)技术创新含义、分
51
+ # 阿斯顿发地方1
52
+ # ...
53
+ # THIS_LINE_IS_TITLE7.2 (重点)技术创新含义、分
54
+ # 阿斯顿发地方111
55
+ # ...
56
+ #
57
+ # 输出示例(概念上):
58
+ # [
59
+ # [所有与 THIS_LINE_IS_TITLE7.1 相关的行...],
60
+ # [所有与 THIS_LINE_IS_TITLE7.2 相关的行...]
61
+ # ]
62
+ if sep_list_word is not None:
63
+ groups: "OrderedDict[str, list[str]]" = OrderedDict()
64
+ current_key: str | None = None
65
+ for line in list_data:
66
+ if not isinstance(line, str):
67
+ # 非字符串, 直接跳过
68
+ continue
69
+ # 识别标题行: 以 THIS_LINE_IS_TITLE 开头
70
+ stripped = line.strip()
71
+ if stripped.startswith(sep_list_word):
72
+ # 取到第一个空格之前作为分组 key:
73
+ # "THIS_LINE_IS_TITLE7.2 (重点)技术创新含义、分"
74
+ # -> "THIS_LINE_IS_TITLE7.2"
75
+ first_part = "".join([c for c in stripped if c.isdigit()])
76
+ current_key = first_part
77
+ if current_key not in groups:
78
+ groups[current_key] = []
79
+ groups[current_key].append(line)
80
+ else:
81
+ # 非标题行, 如果已经有当前标题, 归到当前标题下;
82
+ # 若还未遇到任何标题(例如开头的杂项行), 直接忽略
83
+ if current_key is not None:
84
+ groups[current_key].append(line)
85
+ return list(groups.values())
31
86
  return []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: upplib
3
- Version: 3.4.5
3
+ Version: 3.4.6
4
4
  Summary: util
5
5
  Author: Luck
6
6
  Author-email: wantwaterfish@gmail.com
@@ -18,10 +18,10 @@ upplib/multi_thread.py,sha256=zOeWG5HGYIzwqiPvef-4ak5bzorf0S3q6Ht8wPit_M0,2705
18
18
  upplib/query_log.py,sha256=BAUAVr3lA_GbDzHNjI5beJPs0ngNmy2gCi5Q6UMdr1s,9238
19
19
  upplib/redis_tool.py,sha256=I1kOqBwfQWVIOAY-hQaaOn1Zrx8BNlK83u-pk4uHPCA,707
20
20
  upplib/text_to_file.py,sha256=6EMfq9yB5FXSijKNetVSH83bhb0Ipzpu6QIoSwOONHQ,14876
21
- upplib/text_to_text.py,sha256=T4vvl_QF5s5-m87gzyvggUtXU1gJn7_4DNAarTAyKEI,869
21
+ upplib/text_to_text.py,sha256=yG-uasz48mFTY0s7Nyu6_HZ2VXD8e1Nwezoeax2naMI,3194
22
22
  upplib/util.py,sha256=K9OAjsbR2R3k0WWPXWWCphxYc25wbPHrW75pweGUSW4,4671
23
- upplib-3.4.5.dist-info/licenses/LICENSE,sha256=WI5JtXXhjcqnIcPllDA1ZtuxNnZ515xjElcILo7z28o,1073
24
- upplib-3.4.5.dist-info/METADATA,sha256=1Gd6lmOuSWsYhWx5FtESyhoefqiEn6q20YRFKQqacSM,940
25
- upplib-3.4.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
26
- upplib-3.4.5.dist-info/top_level.txt,sha256=VwdHDDPP79e1LqtRu5_w30hHB4gT0zlj1weuQYOqFoA,7
27
- upplib-3.4.5.dist-info/RECORD,,
23
+ upplib-3.4.6.dist-info/licenses/LICENSE,sha256=WI5JtXXhjcqnIcPllDA1ZtuxNnZ515xjElcILo7z28o,1073
24
+ upplib-3.4.6.dist-info/METADATA,sha256=TE6rmys6H5ZKib_qnY6Ei9pyBIyf5fJ57_gSUuhxk7g,940
25
+ upplib-3.4.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
26
+ upplib-3.4.6.dist-info/top_level.txt,sha256=VwdHDDPP79e1LqtRu5_w30hHB4gT0zlj1weuQYOqFoA,7
27
+ upplib-3.4.6.dist-info/RECORD,,
File without changes