toolslm 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolslm/__init__.py +1 -1
- toolslm/md_hier.py +113 -16
- {toolslm-0.3.0.dist-info → toolslm-0.3.1.dist-info}/METADATA +1 -1
- toolslm-0.3.1.dist-info/RECORD +13 -0
- toolslm-0.3.0.dist-info/RECORD +0 -13
- {toolslm-0.3.0.dist-info → toolslm-0.3.1.dist-info}/WHEEL +0 -0
- {toolslm-0.3.0.dist-info → toolslm-0.3.1.dist-info}/entry_points.txt +0 -0
- {toolslm-0.3.0.dist-info → toolslm-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {toolslm-0.3.0.dist-info → toolslm-0.3.1.dist-info}/top_level.txt +0 -0
toolslm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.3.
|
|
1
|
+
__version__ = "0.3.1"
|
toolslm/md_hier.py
CHANGED
|
@@ -2,8 +2,11 @@ import re
|
|
|
2
2
|
from fastcore.utils import *
|
|
3
3
|
__all__ = ['markdown_to_dict', 'create_heading_dict']
|
|
4
4
|
|
|
5
|
-
def markdown_to_dict(
|
|
6
|
-
|
|
5
|
+
def markdown_to_dict(
|
|
6
|
+
markdown_content:str # Markdown text including headings
|
|
7
|
+
)->AttrDict: # Dictionary with dot-separated hierarchical keys and content values
|
|
8
|
+
"Parse markdown content into a hierarchical dictionary with dot-separated keys."
|
|
9
|
+
def clean_heading(text): return re.sub(r'[.]+', '', text).strip() # Only remove dots (key separator)
|
|
7
10
|
|
|
8
11
|
lines = markdown_content.splitlines()
|
|
9
12
|
headings = []
|
|
@@ -13,7 +16,6 @@ def markdown_to_dict(markdown_content):
|
|
|
13
16
|
for idx, line in enumerate(lines):
|
|
14
17
|
# Toggle code block state when encountering fence
|
|
15
18
|
if line.strip().startswith('```'): in_code_block = not in_code_block
|
|
16
|
-
|
|
17
19
|
# Only detect headings when not in a code block
|
|
18
20
|
if in_code_block: continue
|
|
19
21
|
match = re.match(r'^(#{1,6})\s*(.*)', line)
|
|
@@ -35,6 +37,9 @@ def markdown_to_dict(markdown_content):
|
|
|
35
37
|
|
|
36
38
|
# Build the dictionary with hierarchical keys
|
|
37
39
|
result,stack = {},[]
|
|
40
|
+
if not headings:
|
|
41
|
+
return dict2obj(result)
|
|
42
|
+
|
|
38
43
|
first_level = headings[0]['level']
|
|
39
44
|
for h in headings:
|
|
40
45
|
stack = stack[:h['level'] - first_level] + [clean_heading(h['text'])]
|
|
@@ -42,23 +47,28 @@ def markdown_to_dict(markdown_content):
|
|
|
42
47
|
result[key] = h['content']
|
|
43
48
|
return dict2obj(result)
|
|
44
49
|
|
|
45
|
-
def create_heading_dict(text):
|
|
46
|
-
|
|
50
|
+
def create_heading_dict(text, rm_fenced=True):
|
|
51
|
+
"Create a nested dictionary structure from markdown headings."
|
|
52
|
+
if rm_fenced: text = re.sub(r'```[\s\S]*?```', '', text)
|
|
47
53
|
headings = re.findall(r'^#+.*', text, flags=re.MULTILINE)
|
|
48
54
|
result = {}
|
|
49
55
|
stack = [result]
|
|
50
|
-
|
|
56
|
+
stack_levels = [0] # Track the level at each stack position
|
|
51
57
|
|
|
52
58
|
for heading in headings:
|
|
53
59
|
level = heading.count('#')
|
|
54
60
|
title = heading.strip('#').strip()
|
|
55
|
-
|
|
61
|
+
|
|
62
|
+
# Pop stack until we find the right parent level
|
|
63
|
+
while len(stack) > 1 and stack_levels[-1] >= level:
|
|
56
64
|
stack.pop()
|
|
57
|
-
|
|
65
|
+
stack_levels.pop()
|
|
66
|
+
|
|
58
67
|
new_dict = {}
|
|
59
68
|
stack[-1][title] = new_dict
|
|
60
69
|
stack.append(new_dict)
|
|
61
|
-
|
|
70
|
+
stack_levels.append(level)
|
|
71
|
+
|
|
62
72
|
return dict2obj(result)
|
|
63
73
|
|
|
64
74
|
|
|
@@ -102,8 +112,8 @@ Admin users management.
|
|
|
102
112
|
def test_special_characters():
|
|
103
113
|
md_content = "# Heading *With* Special _Characters_!\nContent under heading."
|
|
104
114
|
result = markdown_to_dict(md_content)
|
|
105
|
-
assert 'Heading With Special
|
|
106
|
-
assert result['Heading With Special
|
|
115
|
+
assert 'Heading *With* Special _Characters_!' in result
|
|
116
|
+
assert result['Heading *With* Special _Characters_!'] == '# Heading *With* Special _Characters_!\nContent under heading.'
|
|
107
117
|
|
|
108
118
|
def test_duplicate_headings():
|
|
109
119
|
md_content = "# Duplicate\n## Duplicate\n### Duplicate\nContent under duplicate headings."
|
|
@@ -141,8 +151,8 @@ Admin users management.
|
|
|
141
151
|
assert 'Sib 2' in result
|
|
142
152
|
assert 'Sib 3' in result
|
|
143
153
|
assert 'Sib 4' in result
|
|
144
|
-
assert
|
|
145
|
-
|
|
154
|
+
assert "Sib 5'" in result # Note the apostrophe is preserved
|
|
155
|
+
|
|
146
156
|
def test_code_chunks_escaped():
|
|
147
157
|
md_content = "# Parent\nParent content.\n## Child\nChild content.\n```python\n# Code comment\nprint('Hello, world!')\n```"
|
|
148
158
|
result = markdown_to_dict(md_content)
|
|
@@ -159,7 +169,7 @@ Admin users management.
|
|
|
159
169
|
test_code_chunks_escaped()
|
|
160
170
|
print('tests passed')
|
|
161
171
|
|
|
162
|
-
def test_nested_headings():
|
|
172
|
+
def test_nested_headings():
|
|
163
173
|
md_content = "# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content."
|
|
164
174
|
result = create_heading_dict(md_content)
|
|
165
175
|
assert 'Child' in result['Parent']
|
|
@@ -169,7 +179,94 @@ Admin users management.
|
|
|
169
179
|
md_content = "# Parent\nParent content.\n## Child\nChild content.\n```python\n# Code comment\nprint('Hello, world!')\n```"
|
|
170
180
|
result = create_heading_dict(md_content)
|
|
171
181
|
assert 'Code comment' not in result
|
|
172
|
-
|
|
182
|
+
|
|
173
183
|
test_nested_headings()
|
|
174
184
|
test_code_chunks_escaped()
|
|
175
|
-
|
|
185
|
+
|
|
186
|
+
def test_multiple_h1s():
|
|
187
|
+
md_content = "# First H1\n# Second H1\n# Third H1"
|
|
188
|
+
result = create_heading_dict(md_content)
|
|
189
|
+
assert 'First H1' in result
|
|
190
|
+
assert 'Second H1' in result
|
|
191
|
+
assert 'Third H1' in result
|
|
192
|
+
assert result['First H1'] == {}
|
|
193
|
+
assert result['Second H1'] == {}
|
|
194
|
+
assert result['Third H1'] == {}
|
|
195
|
+
|
|
196
|
+
def test_skip_levels_down():
|
|
197
|
+
md_content = "# Root\n## Level2\n#### Level4"
|
|
198
|
+
result = create_heading_dict(md_content)
|
|
199
|
+
assert 'Root' in result
|
|
200
|
+
assert 'Level2' in result['Root']
|
|
201
|
+
assert 'Level4' in result['Root']['Level2']
|
|
202
|
+
|
|
203
|
+
def test_skip_levels_up():
|
|
204
|
+
md_content = "# Root\n#### Deep\n## Back to 2"
|
|
205
|
+
result = create_heading_dict(md_content)
|
|
206
|
+
assert 'Root' in result
|
|
207
|
+
assert 'Deep' in result['Root']
|
|
208
|
+
assert 'Back to 2' in result['Root']
|
|
209
|
+
assert result['Root']['Deep'] == {}
|
|
210
|
+
assert result['Root']['Back to 2'] == {}
|
|
211
|
+
|
|
212
|
+
def test_non_h1_start():
|
|
213
|
+
md_content = "### Starting at 3\n## Going to 2\n# Finally 1"
|
|
214
|
+
result = create_heading_dict(md_content)
|
|
215
|
+
assert 'Starting at 3' in result
|
|
216
|
+
assert 'Going to 2' in result
|
|
217
|
+
assert 'Finally 1' in result
|
|
218
|
+
|
|
219
|
+
test_multiple_h1s()
|
|
220
|
+
test_skip_levels_down()
|
|
221
|
+
test_skip_levels_up()
|
|
222
|
+
test_non_h1_start()
|
|
223
|
+
|
|
224
|
+
# Critical edge case tests
|
|
225
|
+
def test_empty_input():
|
|
226
|
+
result = markdown_to_dict("")
|
|
227
|
+
assert result == {}
|
|
228
|
+
result = create_heading_dict("")
|
|
229
|
+
assert result == {}
|
|
230
|
+
|
|
231
|
+
def test_whitespace_only():
|
|
232
|
+
result = markdown_to_dict(" \n\t \n ")
|
|
233
|
+
assert result == {}
|
|
234
|
+
result = create_heading_dict(" \n\t \n ")
|
|
235
|
+
assert result == {}
|
|
236
|
+
|
|
237
|
+
def test_malformed_headings():
|
|
238
|
+
# No space after # (actually works - regex allows it)
|
|
239
|
+
md_content = "#NoSpace\n###AlsoNoSpace\nContent"
|
|
240
|
+
result = markdown_to_dict(md_content)
|
|
241
|
+
assert 'NoSpace' in result
|
|
242
|
+
assert 'NoSpace.AlsoNoSpace' in result
|
|
243
|
+
|
|
244
|
+
# Too many #s (matches max 6, extra # preserved in text)
|
|
245
|
+
md_content = "####### Too Many\nContent"
|
|
246
|
+
result = markdown_to_dict(md_content)
|
|
247
|
+
assert '# Too Many' in result # Extra # now preserved in heading text
|
|
248
|
+
|
|
249
|
+
# Empty heading (actually creates empty key)
|
|
250
|
+
md_content = "## \nContent after empty heading"
|
|
251
|
+
result = markdown_to_dict(md_content)
|
|
252
|
+
assert '' in result # Empty heading creates empty key
|
|
253
|
+
|
|
254
|
+
def test_unicode_and_emojis():
|
|
255
|
+
# Unicode characters
|
|
256
|
+
md_content = "# Café & Naïve\nContent with unicode\n## 中文标题\nChinese content"
|
|
257
|
+
result = markdown_to_dict(md_content)
|
|
258
|
+
assert 'Café & Naïve' in result
|
|
259
|
+
assert 'Café & Naïve.中文标题' in result
|
|
260
|
+
|
|
261
|
+
# Emojis
|
|
262
|
+
md_content = "# 🚀 Rocket Heading\nRocket content\n## 💻 Computer\nComputer content"
|
|
263
|
+
result = markdown_to_dict(md_content)
|
|
264
|
+
assert '🚀 Rocket Heading' in result
|
|
265
|
+
assert '🚀 Rocket Heading.💻 Computer' in result
|
|
266
|
+
|
|
267
|
+
test_empty_input()
|
|
268
|
+
test_whitespace_only()
|
|
269
|
+
test_malformed_headings()
|
|
270
|
+
test_unicode_and_emojis()
|
|
271
|
+
print('tests passed')
|
|
272
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
toolslm/__init__.py,sha256=r4xAFihOf72W9TD-lpMi6ntWSTKTP2SlzKP1ytkjRbI,22
|
|
2
|
+
toolslm/_modidx.py,sha256=-D-B5o30VGs11gBKf96lpADVXnZhdiVEshJpLzmUnDs,4378
|
|
3
|
+
toolslm/download.py,sha256=g3BxUSxylC_575M7RFSJ1GI3Co3EwPDdEeWzxaf2Czk,4451
|
|
4
|
+
toolslm/funccall.py,sha256=7nPfbcvDRMWiVKBKMLlCOMInoUJgDs5e38ef2T7QBHY,8485
|
|
5
|
+
toolslm/md_hier.py,sha256=qvPjS3eRGcf4COnrGhdzqRF5_LGUqnu7LWixOy_280E,10076
|
|
6
|
+
toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
|
|
7
|
+
toolslm/xml.py,sha256=D665Nk7NzyZlXyXrpnIRqfK2xQ-6Gf0bCSgocjF7zik,4061
|
|
8
|
+
toolslm-0.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
toolslm-0.3.1.dist-info/METADATA,sha256=2-AJ1GSzVATnoJ6XHCAMp85oidTS-zbxS0vH6jJfIRE,2404
|
|
10
|
+
toolslm-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
toolslm-0.3.1.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
+
toolslm-0.3.1.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
+
toolslm-0.3.1.dist-info/RECORD,,
|
toolslm-0.3.0.dist-info/RECORD
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
toolslm/__init__.py,sha256=VrXpHDu3erkzwl_WXrqINBm9xWkcyUy53IQOj042dOs,22
|
|
2
|
-
toolslm/_modidx.py,sha256=-D-B5o30VGs11gBKf96lpADVXnZhdiVEshJpLzmUnDs,4378
|
|
3
|
-
toolslm/download.py,sha256=g3BxUSxylC_575M7RFSJ1GI3Co3EwPDdEeWzxaf2Czk,4451
|
|
4
|
-
toolslm/funccall.py,sha256=7nPfbcvDRMWiVKBKMLlCOMInoUJgDs5e38ef2T7QBHY,8485
|
|
5
|
-
toolslm/md_hier.py,sha256=4uC12443tPBduYJgIZZIcEat2VG0x7JYC8-SwDdS2JY,6360
|
|
6
|
-
toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
|
|
7
|
-
toolslm/xml.py,sha256=D665Nk7NzyZlXyXrpnIRqfK2xQ-6Gf0bCSgocjF7zik,4061
|
|
8
|
-
toolslm-0.3.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
-
toolslm-0.3.0.dist-info/METADATA,sha256=JmN3o1_BAvUgUWi7q8j8uxpweyQzG6qTMt2u_NgASdU,2404
|
|
10
|
-
toolslm-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
-
toolslm-0.3.0.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
-
toolslm-0.3.0.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
-
toolslm-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|