toolslm 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolslm/__init__.py +1 -1
- toolslm/md_hier.py +174 -71
- {toolslm-0.3.0.dist-info → toolslm-0.3.2.dist-info}/METADATA +1 -1
- toolslm-0.3.2.dist-info/RECORD +13 -0
- toolslm-0.3.0.dist-info/RECORD +0 -13
- {toolslm-0.3.0.dist-info → toolslm-0.3.2.dist-info}/WHEEL +0 -0
- {toolslm-0.3.0.dist-info → toolslm-0.3.2.dist-info}/entry_points.txt +0 -0
- {toolslm-0.3.0.dist-info → toolslm-0.3.2.dist-info}/licenses/LICENSE +0 -0
- {toolslm-0.3.0.dist-info → toolslm-0.3.2.dist-info}/top_level.txt +0 -0
toolslm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.3.
|
|
1
|
+
__version__ = "0.3.2"
|
toolslm/md_hier.py
CHANGED
|
@@ -1,65 +1,63 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from fastcore.utils import *
|
|
3
|
-
__all__ = ['
|
|
3
|
+
__all__ = ['create_heading_dict', 'HeadingDict']
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
class HeadingDict(dict):
|
|
6
|
+
"""A dictionary-like object that also stores the markdown text content."""
|
|
7
|
+
def __init__(self, text="", *args, **kwargs):
|
|
8
|
+
super().__init__(*args, **kwargs)
|
|
9
|
+
self.text = text
|
|
7
10
|
|
|
8
|
-
|
|
11
|
+
|
|
12
|
+
def create_heading_dict(text, rm_fenced=True):
|
|
13
|
+
"Create a nested dictionary structure from markdown headings."
|
|
14
|
+
original_text = text
|
|
15
|
+
original_lines = text.splitlines()
|
|
16
|
+
|
|
17
|
+
# Use fenced-removed text only for finding headings
|
|
18
|
+
text_for_headings = text
|
|
19
|
+
if rm_fenced: text_for_headings = re.sub(r'```[\s\S]*?```', '', text)
|
|
20
|
+
|
|
21
|
+
lines_for_headings = text_for_headings.splitlines()
|
|
9
22
|
headings = []
|
|
10
|
-
in_code_block = False
|
|
11
23
|
|
|
12
24
|
# Parse headings with their levels and line numbers
|
|
13
|
-
for idx, line in enumerate(
|
|
14
|
-
|
|
15
|
-
if line.strip().startswith('```'): in_code_block = not in_code_block
|
|
16
|
-
|
|
17
|
-
# Only detect headings when not in a code block
|
|
18
|
-
if in_code_block: continue
|
|
19
|
-
match = re.match(r'^(#{1,6})\s*(.*)', line)
|
|
25
|
+
for idx, line in enumerate(lines_for_headings):
|
|
26
|
+
match = re.match(r'^(#{1,6})\s+\S.*', line)
|
|
20
27
|
if match:
|
|
21
28
|
level = len(match.group(1))
|
|
22
|
-
|
|
23
|
-
headings.append({'level': level, '
|
|
29
|
+
title = line.strip('#').strip()
|
|
30
|
+
headings.append({'level': level, 'title': title, 'line': idx})
|
|
24
31
|
|
|
25
|
-
# Assign content to each heading
|
|
32
|
+
# Assign text content to each heading using original lines
|
|
26
33
|
for i, h in enumerate(headings):
|
|
27
|
-
start = h['line']
|
|
34
|
+
start = h['line']
|
|
28
35
|
# Find the end index: next heading of same or higher level
|
|
29
36
|
for j in range(i + 1, len(headings)):
|
|
30
37
|
if headings[j]['level'] <= h['level']:
|
|
31
38
|
end = headings[j]['line']
|
|
32
39
|
break
|
|
33
|
-
else:
|
|
34
|
-
|
|
40
|
+
else:
|
|
41
|
+
end = len(original_lines)
|
|
42
|
+
h['content'] = '\n'.join(original_lines[start:end]).strip()
|
|
35
43
|
|
|
36
|
-
# Build the
|
|
37
|
-
result
|
|
38
|
-
first_level = headings[0]['level']
|
|
39
|
-
for h in headings:
|
|
40
|
-
stack = stack[:h['level'] - first_level] + [clean_heading(h['text'])]
|
|
41
|
-
key = '.'.join(stack)
|
|
42
|
-
result[key] = h['content']
|
|
43
|
-
return dict2obj(result)
|
|
44
|
-
|
|
45
|
-
def create_heading_dict(text):
|
|
46
|
-
text = re.sub(r'```[\s\S]*?```', '', text)
|
|
47
|
-
headings = re.findall(r'^#+.*', text, flags=re.MULTILINE)
|
|
48
|
-
result = {}
|
|
44
|
+
# Build the nested structure
|
|
45
|
+
result = HeadingDict(original_text)
|
|
49
46
|
stack = [result]
|
|
50
|
-
|
|
47
|
+
stack_levels = [0]
|
|
51
48
|
|
|
52
|
-
for
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
while level <= prev_level:
|
|
49
|
+
for h in headings:
|
|
50
|
+
# Pop stack until we find the right parent level
|
|
51
|
+
while len(stack) > 1 and stack_levels[-1] >= h['level']:
|
|
56
52
|
stack.pop()
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
53
|
+
stack_levels.pop()
|
|
54
|
+
|
|
55
|
+
new_dict = HeadingDict(h['content'])
|
|
56
|
+
stack[-1][h['title']] = new_dict
|
|
60
57
|
stack.append(new_dict)
|
|
61
|
-
|
|
62
|
-
|
|
58
|
+
stack_levels.append(h['level'])
|
|
59
|
+
|
|
60
|
+
return result
|
|
63
61
|
|
|
64
62
|
|
|
65
63
|
if __name__=='__main__':
|
|
@@ -91,63 +89,71 @@ Admin section.
|
|
|
91
89
|
Admin users management.
|
|
92
90
|
"""
|
|
93
91
|
|
|
94
|
-
result =
|
|
92
|
+
result = create_heading_dict(md_content)
|
|
95
93
|
#for key, value in result.items(): print(f'Key: {key}\nValue:\n{value}\n{"-"*40}')
|
|
96
94
|
|
|
97
95
|
def test_empty_content():
|
|
98
96
|
md_content = "# Empty Heading"
|
|
99
|
-
result =
|
|
100
|
-
assert
|
|
97
|
+
result = create_heading_dict(md_content)
|
|
98
|
+
assert 'Empty Heading' in result
|
|
99
|
+
assert result['Empty Heading'].text == '# Empty Heading'
|
|
100
|
+
assert result.text == md_content
|
|
101
101
|
|
|
102
102
|
def test_special_characters():
|
|
103
103
|
md_content = "# Heading *With* Special _Characters_!\nContent under heading."
|
|
104
|
-
result =
|
|
105
|
-
assert 'Heading With Special
|
|
106
|
-
assert result['Heading With Special
|
|
104
|
+
result = create_heading_dict(md_content)
|
|
105
|
+
assert 'Heading *With* Special _Characters_!' in result
|
|
106
|
+
assert result['Heading *With* Special _Characters_!'].text == '# Heading *With* Special _Characters_!\nContent under heading.'
|
|
107
|
+
assert result.text == md_content
|
|
107
108
|
|
|
108
109
|
def test_duplicate_headings():
|
|
109
110
|
md_content = "# Duplicate\n## Duplicate\n### Duplicate\nContent under duplicate headings."
|
|
110
|
-
result =
|
|
111
|
+
result = create_heading_dict(md_content)
|
|
111
112
|
assert 'Duplicate' in result
|
|
112
|
-
assert 'Duplicate
|
|
113
|
-
assert 'Duplicate
|
|
114
|
-
assert result['Duplicate
|
|
113
|
+
assert 'Duplicate' in result['Duplicate']
|
|
114
|
+
assert 'Duplicate' in result['Duplicate']['Duplicate']
|
|
115
|
+
assert result['Duplicate']['Duplicate']['Duplicate'].text == '### Duplicate\nContent under duplicate headings.'
|
|
116
|
+
assert result.text == md_content
|
|
115
117
|
|
|
116
118
|
def test_no_content():
|
|
117
119
|
md_content = "# No Content Heading\n## Subheading"
|
|
118
|
-
result =
|
|
119
|
-
assert result['No Content Heading'] == '# No Content Heading\n## Subheading'
|
|
120
|
-
assert result['No Content Heading
|
|
120
|
+
result = create_heading_dict(md_content)
|
|
121
|
+
assert result['No Content Heading'].text == '# No Content Heading\n## Subheading'
|
|
122
|
+
assert result['No Content Heading']['Subheading'].text == '## Subheading'
|
|
123
|
+
assert result.text == md_content
|
|
121
124
|
|
|
122
125
|
def test_different_levels():
|
|
123
126
|
md_content = "### Level 3 Heading\nContent at level 3.\n# Level 1 Heading\nContent at level 1."
|
|
124
|
-
result =
|
|
127
|
+
result = create_heading_dict(md_content)
|
|
125
128
|
assert 'Level 3 Heading' in result
|
|
126
129
|
assert 'Level 1 Heading' in result
|
|
127
|
-
assert result['Level 3 Heading'] == '### Level 3 Heading\nContent at level 3.'
|
|
128
|
-
assert result['Level 1 Heading'] == '# Level 1 Heading\nContent at level 1.'
|
|
130
|
+
assert result['Level 3 Heading'].text == '### Level 3 Heading\nContent at level 3.'
|
|
131
|
+
assert result['Level 1 Heading'].text == '# Level 1 Heading\nContent at level 1.'
|
|
132
|
+
assert result.text == md_content
|
|
129
133
|
|
|
130
134
|
def test_parent_includes_subheadings():
|
|
131
135
|
md_content = "# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content."
|
|
132
|
-
result =
|
|
133
|
-
assert result['Parent'] == '# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content.'
|
|
134
|
-
assert result['Parent
|
|
135
|
-
assert result['Parent
|
|
136
|
+
result = create_heading_dict(md_content)
|
|
137
|
+
assert result['Parent'].text == '# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content.'
|
|
138
|
+
assert result['Parent']['Child'].text == '## Child\nChild content.\n### Grandchild\nGrandchild content.'
|
|
139
|
+
assert result['Parent']['Child']['Grandchild'].text == '### Grandchild\nGrandchild content.'
|
|
140
|
+
assert result.text == md_content
|
|
136
141
|
|
|
137
142
|
def test_multiple_level2_siblings():
|
|
138
|
-
md_content = "##Sib 1\n##Sib 2\n##Sib 3\n##Sib 4\n##Sib 5'"
|
|
139
|
-
result =
|
|
143
|
+
md_content = "## Sib 1\n## Sib 2\n## Sib 3\n## Sib 4\n## Sib 5'"
|
|
144
|
+
result = create_heading_dict(md_content)
|
|
140
145
|
assert 'Sib 1' in result
|
|
141
146
|
assert 'Sib 2' in result
|
|
142
147
|
assert 'Sib 3' in result
|
|
143
148
|
assert 'Sib 4' in result
|
|
144
|
-
assert
|
|
145
|
-
|
|
149
|
+
assert "Sib 5'" in result
|
|
150
|
+
assert result.text == md_content
|
|
151
|
+
|
|
146
152
|
def test_code_chunks_escaped():
|
|
147
153
|
md_content = "# Parent\nParent content.\n## Child\nChild content.\n```python\n# Code comment\nprint('Hello, world!')\n```"
|
|
148
|
-
result =
|
|
149
|
-
assert 'Code comment' not in result
|
|
150
|
-
assert
|
|
154
|
+
result = create_heading_dict(md_content)
|
|
155
|
+
assert 'Code comment' not in str(result)
|
|
156
|
+
assert result.text == md_content
|
|
151
157
|
|
|
152
158
|
test_empty_content()
|
|
153
159
|
test_special_characters()
|
|
@@ -159,7 +165,7 @@ Admin users management.
|
|
|
159
165
|
test_code_chunks_escaped()
|
|
160
166
|
print('tests passed')
|
|
161
167
|
|
|
162
|
-
def test_nested_headings():
|
|
168
|
+
def test_nested_headings():
|
|
163
169
|
md_content = "# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content."
|
|
164
170
|
result = create_heading_dict(md_content)
|
|
165
171
|
assert 'Child' in result['Parent']
|
|
@@ -169,7 +175,104 @@ Admin users management.
|
|
|
169
175
|
md_content = "# Parent\nParent content.\n## Child\nChild content.\n```python\n# Code comment\nprint('Hello, world!')\n```"
|
|
170
176
|
result = create_heading_dict(md_content)
|
|
171
177
|
assert 'Code comment' not in result
|
|
172
|
-
|
|
178
|
+
|
|
179
|
+
def test_fenced_blocks_preserved_in_text():
|
|
180
|
+
md_content = """# Section
|
|
181
|
+
Content before code.
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
# This heading should be ignored for structure
|
|
185
|
+
def hello():
|
|
186
|
+
print("Hello, world!")
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
More content after code."""
|
|
190
|
+
result = create_heading_dict(md_content)
|
|
191
|
+
# Fenced code should be preserved in text content
|
|
192
|
+
assert '```python' in result['Section'].text
|
|
193
|
+
assert 'def hello():' in result['Section'].text
|
|
194
|
+
assert '```' in result['Section'].text
|
|
195
|
+
# But headings inside fenced blocks should not create structure
|
|
196
|
+
assert 'This heading should be ignored for structure' not in result['Section']
|
|
197
|
+
|
|
173
198
|
test_nested_headings()
|
|
174
199
|
test_code_chunks_escaped()
|
|
175
|
-
|
|
200
|
+
test_fenced_blocks_preserved_in_text()
|
|
201
|
+
|
|
202
|
+
def test_multiple_h1s():
|
|
203
|
+
md_content = "# First H1\n# Second H1\n# Third H1"
|
|
204
|
+
result = create_heading_dict(md_content)
|
|
205
|
+
assert 'First H1' in result
|
|
206
|
+
assert 'Second H1' in result
|
|
207
|
+
assert 'Third H1' in result
|
|
208
|
+
assert result['First H1'] == {}
|
|
209
|
+
assert result['Second H1'] == {}
|
|
210
|
+
assert result['Third H1'] == {}
|
|
211
|
+
|
|
212
|
+
def test_skip_levels_down():
|
|
213
|
+
md_content = "# Root\n## Level2\n#### Level4"
|
|
214
|
+
result = create_heading_dict(md_content)
|
|
215
|
+
assert 'Root' in result
|
|
216
|
+
assert 'Level2' in result['Root']
|
|
217
|
+
assert 'Level4' in result['Root']['Level2']
|
|
218
|
+
|
|
219
|
+
def test_skip_levels_up():
|
|
220
|
+
md_content = "# Root\n#### Deep\n## Back to 2"
|
|
221
|
+
result = create_heading_dict(md_content)
|
|
222
|
+
assert 'Root' in result
|
|
223
|
+
assert 'Deep' in result['Root']
|
|
224
|
+
assert 'Back to 2' in result['Root']
|
|
225
|
+
assert result['Root']['Deep'] == {}
|
|
226
|
+
assert result['Root']['Back to 2'] == {}
|
|
227
|
+
|
|
228
|
+
def test_non_h1_start():
|
|
229
|
+
md_content = "### Starting at 3\n## Going to 2\n# Finally 1"
|
|
230
|
+
result = create_heading_dict(md_content)
|
|
231
|
+
assert 'Starting at 3' in result
|
|
232
|
+
assert 'Going to 2' in result
|
|
233
|
+
assert 'Finally 1' in result
|
|
234
|
+
|
|
235
|
+
test_multiple_h1s()
|
|
236
|
+
test_skip_levels_down()
|
|
237
|
+
test_skip_levels_up()
|
|
238
|
+
test_non_h1_start()
|
|
239
|
+
|
|
240
|
+
# Edge case tests
|
|
241
|
+
def test_empty_input():
|
|
242
|
+
result = create_heading_dict("")
|
|
243
|
+
assert result == {}
|
|
244
|
+
assert result.text == ""
|
|
245
|
+
|
|
246
|
+
def test_whitespace_only():
|
|
247
|
+
result = create_heading_dict(" \n\t \n ")
|
|
248
|
+
assert result == {}
|
|
249
|
+
assert result.text == " \n\t \n "
|
|
250
|
+
|
|
251
|
+
def test_malformed_headings():
|
|
252
|
+
# Too many #s (matches max 6)
|
|
253
|
+
md_content = "####### Too Many\nContent"
|
|
254
|
+
result = create_heading_dict(md_content)
|
|
255
|
+
assert 'Too Many' not in result
|
|
256
|
+
assert result.text == md_content
|
|
257
|
+
|
|
258
|
+
def test_unicode_and_emojis():
|
|
259
|
+
# Unicode characters
|
|
260
|
+
md_content = "# Café & Naïve\nContent with unicode\n## 中文标题\nChinese content"
|
|
261
|
+
result = create_heading_dict(md_content)
|
|
262
|
+
assert 'Café & Naïve' in result
|
|
263
|
+
assert '中文标题' in result['Café & Naïve']
|
|
264
|
+
assert result.text == md_content
|
|
265
|
+
|
|
266
|
+
# Emojis
|
|
267
|
+
md_content = "# 🚀 Rocket Heading\nRocket content\n## 💻 Computer\nComputer content"
|
|
268
|
+
result = create_heading_dict(md_content)
|
|
269
|
+
assert '🚀 Rocket Heading' in result
|
|
270
|
+
assert '💻 Computer' in result['🚀 Rocket Heading']
|
|
271
|
+
assert result.text == md_content
|
|
272
|
+
|
|
273
|
+
test_empty_input()
|
|
274
|
+
test_whitespace_only()
|
|
275
|
+
test_malformed_headings()
|
|
276
|
+
test_unicode_and_emojis()
|
|
277
|
+
print('tests passed')
|
|
278
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
toolslm/__init__.py,sha256=vNiWJ14r_cw5t_7UDqDQIVZvladKFGyHH2avsLpN7Vg,22
|
|
2
|
+
toolslm/_modidx.py,sha256=-D-B5o30VGs11gBKf96lpADVXnZhdiVEshJpLzmUnDs,4378
|
|
3
|
+
toolslm/download.py,sha256=g3BxUSxylC_575M7RFSJ1GI3Co3EwPDdEeWzxaf2Czk,4451
|
|
4
|
+
toolslm/funccall.py,sha256=7nPfbcvDRMWiVKBKMLlCOMInoUJgDs5e38ef2T7QBHY,8485
|
|
5
|
+
toolslm/md_hier.py,sha256=Havk9Hf0t2Xt67n_r7ZxCsS0pciR85iLcE5quShvkTg,10032
|
|
6
|
+
toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
|
|
7
|
+
toolslm/xml.py,sha256=D665Nk7NzyZlXyXrpnIRqfK2xQ-6Gf0bCSgocjF7zik,4061
|
|
8
|
+
toolslm-0.3.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
toolslm-0.3.2.dist-info/METADATA,sha256=5lWEv7BWTwdd5cvXgGsQXqr0j6tk8UIcGpRTlcjV3V4,2404
|
|
10
|
+
toolslm-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
toolslm-0.3.2.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
+
toolslm-0.3.2.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
+
toolslm-0.3.2.dist-info/RECORD,,
|
toolslm-0.3.0.dist-info/RECORD
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
toolslm/__init__.py,sha256=VrXpHDu3erkzwl_WXrqINBm9xWkcyUy53IQOj042dOs,22
|
|
2
|
-
toolslm/_modidx.py,sha256=-D-B5o30VGs11gBKf96lpADVXnZhdiVEshJpLzmUnDs,4378
|
|
3
|
-
toolslm/download.py,sha256=g3BxUSxylC_575M7RFSJ1GI3Co3EwPDdEeWzxaf2Czk,4451
|
|
4
|
-
toolslm/funccall.py,sha256=7nPfbcvDRMWiVKBKMLlCOMInoUJgDs5e38ef2T7QBHY,8485
|
|
5
|
-
toolslm/md_hier.py,sha256=4uC12443tPBduYJgIZZIcEat2VG0x7JYC8-SwDdS2JY,6360
|
|
6
|
-
toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
|
|
7
|
-
toolslm/xml.py,sha256=D665Nk7NzyZlXyXrpnIRqfK2xQ-6Gf0bCSgocjF7zik,4061
|
|
8
|
-
toolslm-0.3.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
-
toolslm-0.3.0.dist-info/METADATA,sha256=JmN3o1_BAvUgUWi7q8j8uxpweyQzG6qTMt2u_NgASdU,2404
|
|
10
|
-
toolslm-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
-
toolslm-0.3.0.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
-
toolslm-0.3.0.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
-
toolslm-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|