toolslm 0.3.1__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {toolslm-0.3.1/toolslm.egg-info → toolslm-0.3.3}/PKG-INFO +1 -1
- {toolslm-0.3.1 → toolslm-0.3.3}/settings.ini +1 -1
- toolslm-0.3.3/toolslm/__init__.py +1 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm/_modidx.py +6 -1
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm/funccall.py +3 -1
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm/md_hier.py +101 -95
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm/xml.py +46 -4
- {toolslm-0.3.1 → toolslm-0.3.3/toolslm.egg-info}/PKG-INFO +1 -1
- toolslm-0.3.1/toolslm/__init__.py +0 -1
- {toolslm-0.3.1 → toolslm-0.3.3}/LICENSE +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/MANIFEST.in +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/README.md +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/pyproject.toml +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/setup.cfg +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/setup.py +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm/download.py +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm/shell.py +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm.egg-info/SOURCES.txt +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm.egg-info/dependency_links.txt +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm.egg-info/entry_points.txt +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm.egg-info/not-zip-safe +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm.egg-info/requires.txt +0 -0
- {toolslm-0.3.1 → toolslm-0.3.3}/toolslm.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.3"
|
|
@@ -35,10 +35,15 @@ d = { 'settings': { 'branch': 'main',
|
|
|
35
35
|
'toolslm/shell.py'),
|
|
36
36
|
'toolslm.shell.get_shell': ('shell.html#get_shell', 'toolslm/shell.py')},
|
|
37
37
|
'toolslm.xml': { 'toolslm.xml._add_nls': ('xml.html#_add_nls', 'toolslm/xml.py'),
|
|
38
|
+
'toolslm.xml.cell2out': ('xml.html#cell2out', 'toolslm/xml.py'),
|
|
39
|
+
'toolslm.xml.cell2xml': ('xml.html#cell2xml', 'toolslm/xml.py'),
|
|
38
40
|
'toolslm.xml.docs_xml': ('xml.html#docs_xml', 'toolslm/xml.py'),
|
|
39
41
|
'toolslm.xml.files2ctx': ('xml.html#files2ctx', 'toolslm/xml.py'),
|
|
40
42
|
'toolslm.xml.folder2ctx': ('xml.html#folder2ctx', 'toolslm/xml.py'),
|
|
41
43
|
'toolslm.xml.folder2ctx_cli': ('xml.html#folder2ctx_cli', 'toolslm/xml.py'),
|
|
44
|
+
'toolslm.xml.get_mime_text': ('xml.html#get_mime_text', 'toolslm/xml.py'),
|
|
42
45
|
'toolslm.xml.json_to_xml': ('xml.html#json_to_xml', 'toolslm/xml.py'),
|
|
43
46
|
'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
|
|
44
|
-
'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py')
|
|
47
|
+
'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
|
|
48
|
+
'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
|
|
49
|
+
'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py')}}}
|
|
@@ -107,7 +107,9 @@ def _get_nested_schema(obj):
|
|
|
107
107
|
if n != 'return' and n != 'self':
|
|
108
108
|
_process_property(n, o, props, req, defs)
|
|
109
109
|
|
|
110
|
-
|
|
110
|
+
tkw = {}
|
|
111
|
+
if isinstance(obj, type): tkw['title']=obj.__name__
|
|
112
|
+
schema = dict(type='object', properties=props, **tkw)
|
|
111
113
|
if req: schema['required'] = list(req)
|
|
112
114
|
if defs: schema['$defs'] = defs
|
|
113
115
|
return schema
|
|
@@ -1,75 +1,63 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from fastcore.utils import *
|
|
3
|
-
__all__ = ['
|
|
3
|
+
__all__ = ['create_heading_dict', 'HeadingDict']
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
class HeadingDict(dict):
|
|
6
|
+
"""A dictionary-like object that also stores the markdown text content."""
|
|
7
|
+
def __init__(self, text="", *args, **kwargs):
|
|
8
|
+
super().__init__(*args, **kwargs)
|
|
9
|
+
self.text = text
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
def create_heading_dict(text, rm_fenced=True):
|
|
13
|
+
"Create a nested dictionary structure from markdown headings."
|
|
14
|
+
original_text = text
|
|
15
|
+
original_lines = text.splitlines()
|
|
16
|
+
|
|
17
|
+
# Use fenced-removed text only for finding headings
|
|
18
|
+
text_for_headings = text
|
|
19
|
+
if rm_fenced: text_for_headings = re.sub(r'```[\s\S]*?```', '', text)
|
|
20
|
+
|
|
21
|
+
lines_for_headings = text_for_headings.splitlines()
|
|
12
22
|
headings = []
|
|
13
|
-
in_code_block = False
|
|
14
23
|
|
|
15
24
|
# Parse headings with their levels and line numbers
|
|
16
|
-
for idx, line in enumerate(
|
|
17
|
-
|
|
18
|
-
if line.strip().startswith('```'): in_code_block = not in_code_block
|
|
19
|
-
# Only detect headings when not in a code block
|
|
20
|
-
if in_code_block: continue
|
|
21
|
-
match = re.match(r'^(#{1,6})\s*(.*)', line)
|
|
25
|
+
for idx, line in enumerate(lines_for_headings):
|
|
26
|
+
match = re.match(r'^(#{1,6})\s+\S.*', line)
|
|
22
27
|
if match:
|
|
23
28
|
level = len(match.group(1))
|
|
24
|
-
|
|
25
|
-
headings.append({'level': level, '
|
|
29
|
+
title = line.strip('#').strip()
|
|
30
|
+
headings.append({'level': level, 'title': title, 'line': idx})
|
|
26
31
|
|
|
27
|
-
# Assign content to each heading
|
|
32
|
+
# Assign text content to each heading using original lines
|
|
28
33
|
for i, h in enumerate(headings):
|
|
29
|
-
start = h['line']
|
|
34
|
+
start = h['line']
|
|
30
35
|
# Find the end index: next heading of same or higher level
|
|
31
36
|
for j in range(i + 1, len(headings)):
|
|
32
37
|
if headings[j]['level'] <= h['level']:
|
|
33
38
|
end = headings[j]['line']
|
|
34
39
|
break
|
|
35
|
-
else:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
# Build the dictionary with hierarchical keys
|
|
39
|
-
result,stack = {},[]
|
|
40
|
-
if not headings:
|
|
41
|
-
return dict2obj(result)
|
|
40
|
+
else:
|
|
41
|
+
end = len(original_lines)
|
|
42
|
+
h['content'] = '\n'.join(original_lines[start:end]).strip()
|
|
42
43
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
stack = stack[:h['level'] - first_level] + [clean_heading(h['text'])]
|
|
46
|
-
key = '.'.join(stack)
|
|
47
|
-
result[key] = h['content']
|
|
48
|
-
return dict2obj(result)
|
|
49
|
-
|
|
50
|
-
def create_heading_dict(text, rm_fenced=True):
|
|
51
|
-
"Create a nested dictionary structure from markdown headings."
|
|
52
|
-
if rm_fenced: text = re.sub(r'```[\s\S]*?```', '', text)
|
|
53
|
-
headings = re.findall(r'^#+.*', text, flags=re.MULTILINE)
|
|
54
|
-
result = {}
|
|
44
|
+
# Build the nested structure
|
|
45
|
+
result = HeadingDict(original_text)
|
|
55
46
|
stack = [result]
|
|
56
|
-
stack_levels = [0]
|
|
57
|
-
|
|
58
|
-
for heading in headings:
|
|
59
|
-
level = heading.count('#')
|
|
60
|
-
title = heading.strip('#').strip()
|
|
47
|
+
stack_levels = [0]
|
|
61
48
|
|
|
49
|
+
for h in headings:
|
|
62
50
|
# Pop stack until we find the right parent level
|
|
63
|
-
while len(stack) > 1 and stack_levels[-1] >= level:
|
|
51
|
+
while len(stack) > 1 and stack_levels[-1] >= h['level']:
|
|
64
52
|
stack.pop()
|
|
65
53
|
stack_levels.pop()
|
|
66
54
|
|
|
67
|
-
new_dict =
|
|
68
|
-
stack[-1][title] = new_dict
|
|
55
|
+
new_dict = HeadingDict(h['content'])
|
|
56
|
+
stack[-1][h['title']] = new_dict
|
|
69
57
|
stack.append(new_dict)
|
|
70
|
-
stack_levels.append(level)
|
|
58
|
+
stack_levels.append(h['level'])
|
|
71
59
|
|
|
72
|
-
return
|
|
60
|
+
return result
|
|
73
61
|
|
|
74
62
|
|
|
75
63
|
if __name__=='__main__':
|
|
@@ -101,63 +89,71 @@ Admin section.
|
|
|
101
89
|
Admin users management.
|
|
102
90
|
"""
|
|
103
91
|
|
|
104
|
-
result =
|
|
92
|
+
result = create_heading_dict(md_content)
|
|
105
93
|
#for key, value in result.items(): print(f'Key: {key}\nValue:\n{value}\n{"-"*40}')
|
|
106
94
|
|
|
107
95
|
def test_empty_content():
|
|
108
96
|
md_content = "# Empty Heading"
|
|
109
|
-
result =
|
|
110
|
-
assert
|
|
97
|
+
result = create_heading_dict(md_content)
|
|
98
|
+
assert 'Empty Heading' in result
|
|
99
|
+
assert result['Empty Heading'].text == '# Empty Heading'
|
|
100
|
+
assert result.text == md_content
|
|
111
101
|
|
|
112
102
|
def test_special_characters():
|
|
113
103
|
md_content = "# Heading *With* Special _Characters_!\nContent under heading."
|
|
114
|
-
result =
|
|
104
|
+
result = create_heading_dict(md_content)
|
|
115
105
|
assert 'Heading *With* Special _Characters_!' in result
|
|
116
|
-
assert result['Heading *With* Special _Characters_!'] == '# Heading *With* Special _Characters_!\nContent under heading.'
|
|
106
|
+
assert result['Heading *With* Special _Characters_!'].text == '# Heading *With* Special _Characters_!\nContent under heading.'
|
|
107
|
+
assert result.text == md_content
|
|
117
108
|
|
|
118
109
|
def test_duplicate_headings():
|
|
119
110
|
md_content = "# Duplicate\n## Duplicate\n### Duplicate\nContent under duplicate headings."
|
|
120
|
-
result =
|
|
111
|
+
result = create_heading_dict(md_content)
|
|
121
112
|
assert 'Duplicate' in result
|
|
122
|
-
assert 'Duplicate
|
|
123
|
-
assert 'Duplicate
|
|
124
|
-
assert result['Duplicate
|
|
113
|
+
assert 'Duplicate' in result['Duplicate']
|
|
114
|
+
assert 'Duplicate' in result['Duplicate']['Duplicate']
|
|
115
|
+
assert result['Duplicate']['Duplicate']['Duplicate'].text == '### Duplicate\nContent under duplicate headings.'
|
|
116
|
+
assert result.text == md_content
|
|
125
117
|
|
|
126
118
|
def test_no_content():
|
|
127
119
|
md_content = "# No Content Heading\n## Subheading"
|
|
128
|
-
result =
|
|
129
|
-
assert result['No Content Heading'] == '# No Content Heading\n## Subheading'
|
|
130
|
-
assert result['No Content Heading
|
|
120
|
+
result = create_heading_dict(md_content)
|
|
121
|
+
assert result['No Content Heading'].text == '# No Content Heading\n## Subheading'
|
|
122
|
+
assert result['No Content Heading']['Subheading'].text == '## Subheading'
|
|
123
|
+
assert result.text == md_content
|
|
131
124
|
|
|
132
125
|
def test_different_levels():
|
|
133
126
|
md_content = "### Level 3 Heading\nContent at level 3.\n# Level 1 Heading\nContent at level 1."
|
|
134
|
-
result =
|
|
127
|
+
result = create_heading_dict(md_content)
|
|
135
128
|
assert 'Level 3 Heading' in result
|
|
136
129
|
assert 'Level 1 Heading' in result
|
|
137
|
-
assert result['Level 3 Heading'] == '### Level 3 Heading\nContent at level 3.'
|
|
138
|
-
assert result['Level 1 Heading'] == '# Level 1 Heading\nContent at level 1.'
|
|
130
|
+
assert result['Level 3 Heading'].text == '### Level 3 Heading\nContent at level 3.'
|
|
131
|
+
assert result['Level 1 Heading'].text == '# Level 1 Heading\nContent at level 1.'
|
|
132
|
+
assert result.text == md_content
|
|
139
133
|
|
|
140
134
|
def test_parent_includes_subheadings():
|
|
141
135
|
md_content = "# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content."
|
|
142
|
-
result =
|
|
143
|
-
assert result['Parent'] == '# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content.'
|
|
144
|
-
assert result['Parent
|
|
145
|
-
assert result['Parent
|
|
136
|
+
result = create_heading_dict(md_content)
|
|
137
|
+
assert result['Parent'].text == '# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content.'
|
|
138
|
+
assert result['Parent']['Child'].text == '## Child\nChild content.\n### Grandchild\nGrandchild content.'
|
|
139
|
+
assert result['Parent']['Child']['Grandchild'].text == '### Grandchild\nGrandchild content.'
|
|
140
|
+
assert result.text == md_content
|
|
146
141
|
|
|
147
142
|
def test_multiple_level2_siblings():
|
|
148
|
-
md_content = "##Sib 1\n##Sib 2\n##Sib 3\n##Sib 4\n##Sib 5'"
|
|
149
|
-
result =
|
|
143
|
+
md_content = "## Sib 1\n## Sib 2\n## Sib 3\n## Sib 4\n## Sib 5'"
|
|
144
|
+
result = create_heading_dict(md_content)
|
|
150
145
|
assert 'Sib 1' in result
|
|
151
146
|
assert 'Sib 2' in result
|
|
152
147
|
assert 'Sib 3' in result
|
|
153
148
|
assert 'Sib 4' in result
|
|
154
|
-
assert "Sib 5'" in result
|
|
149
|
+
assert "Sib 5'" in result
|
|
150
|
+
assert result.text == md_content
|
|
155
151
|
|
|
156
152
|
def test_code_chunks_escaped():
|
|
157
153
|
md_content = "# Parent\nParent content.\n## Child\nChild content.\n```python\n# Code comment\nprint('Hello, world!')\n```"
|
|
158
|
-
result =
|
|
159
|
-
assert 'Code comment' not in result
|
|
160
|
-
assert
|
|
154
|
+
result = create_heading_dict(md_content)
|
|
155
|
+
assert 'Code comment' not in str(result)
|
|
156
|
+
assert result.text == md_content
|
|
161
157
|
|
|
162
158
|
test_empty_content()
|
|
163
159
|
test_special_characters()
|
|
@@ -180,8 +176,28 @@ Admin users management.
|
|
|
180
176
|
result = create_heading_dict(md_content)
|
|
181
177
|
assert 'Code comment' not in result
|
|
182
178
|
|
|
179
|
+
def test_fenced_blocks_preserved_in_text():
|
|
180
|
+
md_content = """# Section
|
|
181
|
+
Content before code.
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
# This heading should be ignored for structure
|
|
185
|
+
def hello():
|
|
186
|
+
print("Hello, world!")
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
More content after code."""
|
|
190
|
+
result = create_heading_dict(md_content)
|
|
191
|
+
# Fenced code should be preserved in text content
|
|
192
|
+
assert '```python' in result['Section'].text
|
|
193
|
+
assert 'def hello():' in result['Section'].text
|
|
194
|
+
assert '```' in result['Section'].text
|
|
195
|
+
# But headings inside fenced blocks should not create structure
|
|
196
|
+
assert 'This heading should be ignored for structure' not in result['Section']
|
|
197
|
+
|
|
183
198
|
test_nested_headings()
|
|
184
199
|
test_code_chunks_escaped()
|
|
200
|
+
test_fenced_blocks_preserved_in_text()
|
|
185
201
|
|
|
186
202
|
def test_multiple_h1s():
|
|
187
203
|
md_content = "# First H1\n# Second H1\n# Third H1"
|
|
@@ -221,48 +237,38 @@ Admin users management.
|
|
|
221
237
|
test_skip_levels_up()
|
|
222
238
|
test_non_h1_start()
|
|
223
239
|
|
|
224
|
-
#
|
|
240
|
+
# Edge case tests
|
|
225
241
|
def test_empty_input():
|
|
226
|
-
result = markdown_to_dict("")
|
|
227
|
-
assert result == {}
|
|
228
242
|
result = create_heading_dict("")
|
|
229
243
|
assert result == {}
|
|
244
|
+
assert result.text == ""
|
|
230
245
|
|
|
231
246
|
def test_whitespace_only():
|
|
232
|
-
result = markdown_to_dict(" \n\t \n ")
|
|
233
|
-
assert result == {}
|
|
234
247
|
result = create_heading_dict(" \n\t \n ")
|
|
235
248
|
assert result == {}
|
|
249
|
+
assert result.text == " \n\t \n "
|
|
236
250
|
|
|
237
251
|
def test_malformed_headings():
|
|
238
|
-
#
|
|
239
|
-
md_content = "#NoSpace\n###AlsoNoSpace\nContent"
|
|
240
|
-
result = markdown_to_dict(md_content)
|
|
241
|
-
assert 'NoSpace' in result
|
|
242
|
-
assert 'NoSpace.AlsoNoSpace' in result
|
|
243
|
-
|
|
244
|
-
# Too many #s (matches max 6, extra # preserved in text)
|
|
252
|
+
# Too many #s (matches max 6)
|
|
245
253
|
md_content = "####### Too Many\nContent"
|
|
246
|
-
result =
|
|
247
|
-
assert '
|
|
248
|
-
|
|
249
|
-
# Empty heading (actually creates empty key)
|
|
250
|
-
md_content = "## \nContent after empty heading"
|
|
251
|
-
result = markdown_to_dict(md_content)
|
|
252
|
-
assert '' in result # Empty heading creates empty key
|
|
254
|
+
result = create_heading_dict(md_content)
|
|
255
|
+
assert 'Too Many' not in result
|
|
256
|
+
assert result.text == md_content
|
|
253
257
|
|
|
254
258
|
def test_unicode_and_emojis():
|
|
255
259
|
# Unicode characters
|
|
256
260
|
md_content = "# Café & Naïve\nContent with unicode\n## 中文标题\nChinese content"
|
|
257
|
-
result =
|
|
261
|
+
result = create_heading_dict(md_content)
|
|
258
262
|
assert 'Café & Naïve' in result
|
|
259
|
-
assert 'Café & Naïve
|
|
263
|
+
assert '中文标题' in result['Café & Naïve']
|
|
264
|
+
assert result.text == md_content
|
|
260
265
|
|
|
261
266
|
# Emojis
|
|
262
267
|
md_content = "# 🚀 Rocket Heading\nRocket content\n## 💻 Computer\nComputer content"
|
|
263
|
-
result =
|
|
268
|
+
result = create_heading_dict(md_content)
|
|
264
269
|
assert '🚀 Rocket Heading' in result
|
|
265
|
-
assert '
|
|
270
|
+
assert '💻 Computer' in result['🚀 Rocket Heading']
|
|
271
|
+
assert result.text == md_content
|
|
266
272
|
|
|
267
273
|
test_empty_input()
|
|
268
274
|
test_whitespace_only()
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../00_xml.ipynb.
|
|
2
2
|
|
|
3
3
|
# %% auto 0
|
|
4
|
-
__all__ = ['doctype', 'json_to_xml', '
|
|
4
|
+
__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'mk_doctype', 'mk_doc', 'docs_xml',
|
|
5
|
+
'read_file', 'files2ctx', 'folder2ctx', 'folder2ctx_cli']
|
|
5
6
|
|
|
6
7
|
# %% ../00_xml.ipynb
|
|
7
8
|
import hashlib,xml.etree.ElementTree as ET
|
|
@@ -10,7 +11,7 @@ from collections import namedtuple
|
|
|
10
11
|
from fastcore.utils import *
|
|
11
12
|
from fastcore.meta import delegates
|
|
12
13
|
from fastcore.xtras import hl_md
|
|
13
|
-
from fastcore.xml import to_xml, Document, Documents, Document_content, Src
|
|
14
|
+
from fastcore.xml import to_xml, Document, Documents, Document_content, Src, Source,Out,Outs,Cell
|
|
14
15
|
from fastcore.script import call_parse
|
|
15
16
|
try: from IPython import display
|
|
16
17
|
except: display=None
|
|
@@ -31,6 +32,39 @@ def json_to_xml(d:dict, # JSON dictionary to convert
|
|
|
31
32
|
ET.indent(root)
|
|
32
33
|
return ET.tostring(root, encoding='unicode')
|
|
33
34
|
|
|
35
|
+
# %% ../00_xml.ipynb
|
|
36
|
+
def get_mime_text(data):
|
|
37
|
+
"Get text from MIME bundle, preferring markdown over plain"
|
|
38
|
+
if 'text/markdown' in data: return ''.join(list(data['text/markdown']))
|
|
39
|
+
if 'text/plain' in data: return ''.join(list(data['text/plain']))
|
|
40
|
+
|
|
41
|
+
# %% ../00_xml.ipynb
|
|
42
|
+
def cell2out(o):
|
|
43
|
+
"Convert single notebook output to XML format"
|
|
44
|
+
if hasattr(o, 'data'):
|
|
45
|
+
txt = get_mime_text(o.data)
|
|
46
|
+
if txt: return Out(txt, mime='markdown' if 'text/markdown' in o.data else 'plain')
|
|
47
|
+
if hasattr(o, 'text'):
|
|
48
|
+
txt = o.text if isinstance(o.text, str) else ''.join(o.text)
|
|
49
|
+
return Out(txt, type='stream', name=o.get('name', 'stdout'))
|
|
50
|
+
if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')
|
|
51
|
+
|
|
52
|
+
# %% ../00_xml.ipynb
|
|
53
|
+
def cell2xml(cell):
|
|
54
|
+
"Convert notebook cell to concise XML format"
|
|
55
|
+
cts = Source(''.join(cell.source)) if hasattr(cell, 'source') and cell.source else None
|
|
56
|
+
out_items = L(getattr(cell,'outputs',[])).map(cell2out).filter()
|
|
57
|
+
outs = []
|
|
58
|
+
if out_items: outs = Outs(*out_items)
|
|
59
|
+
parts = [p for p in [cts, outs] if p]
|
|
60
|
+
return Cell(*parts, type=cell.cell_type)
|
|
61
|
+
|
|
62
|
+
# %% ../00_xml.ipynb
|
|
63
|
+
def nb2xml(fname):
|
|
64
|
+
nb = dict2obj(fname.read_json())
|
|
65
|
+
cells_xml = [to_xml(cell2xml(c), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
|
|
66
|
+
return '\n'.join(cells_xml)
|
|
67
|
+
|
|
34
68
|
# %% ../00_xml.ipynb
|
|
35
69
|
doctype = namedtuple('doctype', ['src', 'content'])
|
|
36
70
|
|
|
@@ -73,15 +107,23 @@ def docs_xml(docs:list[str], # The content of each document
|
|
|
73
107
|
if srcs is None: srcs = [None]*len(docs)
|
|
74
108
|
if details is None: details = [{}]*len(docs)
|
|
75
109
|
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
|
|
76
|
-
return pre + to_xml(Documents(docs))
|
|
110
|
+
return pre + to_xml(Documents(docs), do_escape=False)
|
|
111
|
+
|
|
112
|
+
# %% ../00_xml.ipynb
|
|
113
|
+
def read_file(fname):
|
|
114
|
+
"Read file content, converting notebooks to XML if needed"
|
|
115
|
+
fname = Path(fname)
|
|
116
|
+
if fname.suffix == '.ipynb': return nb2xml(fname)
|
|
117
|
+
return fname.read_text()
|
|
77
118
|
|
|
78
119
|
# %% ../00_xml.ipynb
|
|
79
120
|
def files2ctx(
|
|
80
121
|
fnames:list[Union[str,Path]], # List of file names to add to context
|
|
81
122
|
prefix:bool=True # Include Anthropic's suggested prose intro?
|
|
82
123
|
)->str: # XML for LM context
|
|
124
|
+
"Convert files to XML context, handling notebooks"
|
|
83
125
|
fnames = [Path(o) for o in fnames]
|
|
84
|
-
contents = [o
|
|
126
|
+
contents = [read_file(o) for o in fnames]
|
|
85
127
|
return docs_xml(contents, fnames, prefix=prefix)
|
|
86
128
|
|
|
87
129
|
# %% ../00_xml.ipynb
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.3.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|