toolslm 0.0.6__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolslm/__init__.py +1 -1
- toolslm/_modidx.py +7 -0
- toolslm/download.py +24 -12
- toolslm/funccall.py +86 -20
- toolslm/md_hier.py +136 -0
- toolslm/xml.py +20 -20
- toolslm-0.1.0.dist-info/METADATA +80 -0
- toolslm-0.1.0.dist-info/RECORD +13 -0
- {toolslm-0.0.6.dist-info → toolslm-0.1.0.dist-info}/WHEEL +1 -1
- toolslm-0.0.6.dist-info/METADATA +0 -154
- toolslm-0.0.6.dist-info/RECORD +0 -12
- {toolslm-0.0.6.dist-info → toolslm-0.1.0.dist-info}/LICENSE +0 -0
- {toolslm-0.0.6.dist-info → toolslm-0.1.0.dist-info}/entry_points.txt +0 -0
- {toolslm-0.0.6.dist-info → toolslm-0.1.0.dist-info}/top_level.txt +0 -0
toolslm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0
|
|
1
|
+
__version__ = "0.1.0"
|
toolslm/_modidx.py
CHANGED
|
@@ -15,11 +15,18 @@ d = { 'settings': { 'branch': 'main',
|
|
|
15
15
|
'toolslm.download.read_md': ('download.html#read_md', 'toolslm/download.py'),
|
|
16
16
|
'toolslm.download.split_url': ('download.html#split_url', 'toolslm/download.py')},
|
|
17
17
|
'toolslm.funccall': { 'toolslm.funccall._copy_loc': ('funccall.html#_copy_loc', 'toolslm/funccall.py'),
|
|
18
|
+
'toolslm.funccall._get_nested_schema': ('funccall.html#_get_nested_schema', 'toolslm/funccall.py'),
|
|
19
|
+
'toolslm.funccall._handle_container': ('funccall.html#_handle_container', 'toolslm/funccall.py'),
|
|
20
|
+
'toolslm.funccall._handle_type': ('funccall.html#_handle_type', 'toolslm/funccall.py'),
|
|
18
21
|
'toolslm.funccall._param': ('funccall.html#_param', 'toolslm/funccall.py'),
|
|
22
|
+
'toolslm.funccall._process_property': ('funccall.html#_process_property', 'toolslm/funccall.py'),
|
|
19
23
|
'toolslm.funccall._run': ('funccall.html#_run', 'toolslm/funccall.py'),
|
|
20
24
|
'toolslm.funccall._types': ('funccall.html#_types', 'toolslm/funccall.py'),
|
|
25
|
+
'toolslm.funccall.call_func': ('funccall.html#call_func', 'toolslm/funccall.py'),
|
|
21
26
|
'toolslm.funccall.get_schema': ('funccall.html#get_schema', 'toolslm/funccall.py'),
|
|
27
|
+
'toolslm.funccall.mk_ns': ('funccall.html#mk_ns', 'toolslm/funccall.py'),
|
|
22
28
|
'toolslm.funccall.python': ('funccall.html#python', 'toolslm/funccall.py')},
|
|
29
|
+
'toolslm.md_hier': {},
|
|
23
30
|
'toolslm.shell': { 'toolslm.shell.TerminalInteractiveShell.run_cell': ( 'shell.html#terminalinteractiveshell.run_cell',
|
|
24
31
|
'toolslm/shell.py'),
|
|
25
32
|
'toolslm.shell.get_shell': ('shell.html#get_shell', 'toolslm/shell.py')},
|
toolslm/download.py
CHANGED
|
@@ -36,16 +36,27 @@ def html2md(s:str):
|
|
|
36
36
|
return o.handle(s)
|
|
37
37
|
|
|
38
38
|
# %% ../03_download.ipynb 8
|
|
39
|
-
def read_html(url,
|
|
39
|
+
def read_html(url, # URL to read
|
|
40
|
+
sel=None, # Read only outerHTML of CSS selector `sel`
|
|
41
|
+
rm_comments=True, # Removes HTML comments
|
|
42
|
+
rm_details=True, # Removes `<details>` tags
|
|
43
|
+
multi=False, # Get all matches to `sel` or first one
|
|
44
|
+
wrap_tag=None, #If multi, each selection wrapped with <wrap_tag>content</wrap_tag>
|
|
45
|
+
): # Cleaned markdown
|
|
40
46
|
"Get `url`, optionally selecting CSS selector `sel`, and convert to clean markdown"
|
|
41
47
|
page = get(url).text
|
|
42
48
|
if sel:
|
|
43
49
|
soup = BeautifulSoup(page, 'html.parser')
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
50
|
+
if multi:
|
|
51
|
+
page = [str(el) for el in soup.select(sel)]
|
|
52
|
+
if not wrap_tag: page = "\n".join(page)
|
|
53
|
+
else: page = str(soup.select_one(sel))
|
|
54
|
+
mds = map(lambda x: clean_md(html2md(x), rm_comments, rm_details=rm_details), tuplify(page))
|
|
55
|
+
if wrap_tag: return '\n'.join([f"\n<{wrap_tag}>\n{o}</{wrap_tag}>\n" for o in mds])
|
|
56
|
+
else: return'\n'.join(mds)
|
|
47
57
|
|
|
48
|
-
|
|
58
|
+
|
|
59
|
+
# %% ../03_download.ipynb 12
|
|
49
60
|
def get_llmstxt(url, optional=False, n_workers=None):
|
|
50
61
|
"Get llms.txt file from and expand it with `llms_txt.create_ctx()`"
|
|
51
62
|
if not url.endswith('llms.txt'): return None
|
|
@@ -53,7 +64,7 @@ def get_llmstxt(url, optional=False, n_workers=None):
|
|
|
53
64
|
if resp.status_code!=200: return None
|
|
54
65
|
return create_ctx(resp.text, optional=optional, n_workers=n_workers)
|
|
55
66
|
|
|
56
|
-
# %% ../03_download.ipynb
|
|
67
|
+
# %% ../03_download.ipynb 14
|
|
57
68
|
def split_url(url):
|
|
58
69
|
"Split `url` into base, path, and file name, normalising name to '/' if empty"
|
|
59
70
|
parsed = urlparse(url.strip('/'))
|
|
@@ -63,20 +74,20 @@ def split_url(url):
|
|
|
63
74
|
if not path and not fname: path='/'
|
|
64
75
|
return base,path,fname
|
|
65
76
|
|
|
66
|
-
# %% ../03_download.ipynb
|
|
77
|
+
# %% ../03_download.ipynb 16
|
|
67
78
|
def _tryget(url):
|
|
68
79
|
"Return response from `url` if `status_code!=404`, otherwise `None`"
|
|
69
80
|
res = get(url)
|
|
70
81
|
return None if res.status_code==404 else url
|
|
71
82
|
|
|
72
|
-
# %% ../03_download.ipynb
|
|
83
|
+
# %% ../03_download.ipynb 17
|
|
73
84
|
def find_docs(url):
|
|
74
85
|
"If available, return LLM-friendly llms.txt context or markdown file location from `url`"
|
|
75
86
|
base,path,fname = split_url(url)
|
|
76
87
|
url = (base+path+fname).strip('/')
|
|
77
88
|
if fname=='/llms.txt': return url
|
|
78
89
|
if Path(fname).suffix in('.md', '.txt', '.rst'): return _tryget(url)
|
|
79
|
-
if '.' in fname: return _tryget(url+'.md')
|
|
90
|
+
if '.' in fname: return _tryget(url+'.md') or find_docs(url[:url.rfind('/')])
|
|
80
91
|
res = _tryget(url+'/llms.txt')
|
|
81
92
|
if res: return res
|
|
82
93
|
res = _tryget(url+'/index.md')
|
|
@@ -85,13 +96,14 @@ def find_docs(url):
|
|
|
85
96
|
if res: return res
|
|
86
97
|
res = _tryget(url+'/index-commonmark.md')
|
|
87
98
|
if res: return res
|
|
88
|
-
|
|
99
|
+
parsed_url = urlparse(url)
|
|
100
|
+
if parsed_url.path == '/' or not parsed_url.path: return None
|
|
101
|
+
return find_docs(urljoin(url, '..'))
|
|
89
102
|
|
|
90
|
-
# %% ../03_download.ipynb
|
|
103
|
+
# %% ../03_download.ipynb 22
|
|
91
104
|
def read_docs(url, optional=False, n_workers=None, rm_comments=True, rm_details=True):
|
|
92
105
|
"If available, return LLM-friendly llms.txt context or markdown file response for `url`"
|
|
93
106
|
url = find_docs(url)
|
|
94
|
-
if not url: return
|
|
95
107
|
if url.endswith('/llms.txt'): res = get_llmstxt(url, optional=optional, n_workers=n_workers)
|
|
96
108
|
else: res = get(url).text
|
|
97
109
|
return clean_md(res, rm_comments=rm_comments, rm_details=rm_details)
|
toolslm/funccall.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../01_funccall.ipynb.
|
|
2
2
|
|
|
3
3
|
# %% auto 0
|
|
4
|
-
__all__ = ['empty', 'get_schema', 'python']
|
|
4
|
+
__all__ = ['empty', 'get_schema', 'python', 'mk_ns', 'call_func']
|
|
5
5
|
|
|
6
6
|
# %% ../01_funccall.ipynb 2
|
|
7
7
|
import inspect
|
|
8
|
+
from collections import abc
|
|
8
9
|
from fastcore.utils import *
|
|
9
10
|
from fastcore.docments import docments
|
|
10
11
|
|
|
@@ -17,10 +18,11 @@ def _types(t:type)->tuple[str,Optional[str]]:
|
|
|
17
18
|
if t is empty: raise TypeError('Missing type')
|
|
18
19
|
tmap = {int:"integer", float:"number", str:"string", bool:"boolean", list:"array", dict:"object"}
|
|
19
20
|
tmap.update({k.__name__: v for k, v in tmap.items()})
|
|
20
|
-
if getattr(t, '__origin__', None) in
|
|
21
|
-
|
|
21
|
+
if getattr(t, '__origin__', None) in (list,tuple): return "array", tmap.get(t.__args__[0].__name__, "object")
|
|
22
|
+
elif isinstance(t, str): return tmap.get(t, "object"), None
|
|
23
|
+
else: return tmap.get(t.__name__, "object"), None
|
|
22
24
|
|
|
23
|
-
# %% ../01_funccall.ipynb
|
|
25
|
+
# %% ../01_funccall.ipynb 16
|
|
24
26
|
def _param(name, info):
|
|
25
27
|
"json schema parameter given `name` and `info` from docments full dict."
|
|
26
28
|
paramt,itemt = _types(info.anno)
|
|
@@ -29,28 +31,76 @@ def _param(name, info):
|
|
|
29
31
|
if info.default is not empty: pschema["default"] = info.default
|
|
30
32
|
return pschema
|
|
31
33
|
|
|
32
|
-
# %% ../01_funccall.ipynb
|
|
34
|
+
# %% ../01_funccall.ipynb 19
|
|
35
|
+
def _handle_type(t, defs):
|
|
36
|
+
"Handle a single type, creating nested schemas if necessary"
|
|
37
|
+
if isinstance(t, type) and not issubclass(t, (int, float, str, bool)):
|
|
38
|
+
defs[t.__name__] = _get_nested_schema(t)
|
|
39
|
+
return {'$ref': f'#/$defs/{t.__name__}'}
|
|
40
|
+
return {'type': _types(t)[0]}
|
|
41
|
+
|
|
42
|
+
# %% ../01_funccall.ipynb 20
|
|
43
|
+
def _handle_container(origin, args, defs):
|
|
44
|
+
"Handle container types like dict, list, tuple, set"
|
|
45
|
+
if origin is dict:
|
|
46
|
+
value_type = args[1].__args__[0] if hasattr(args[1], '__args__') else args[1]
|
|
47
|
+
return {
|
|
48
|
+
'type': 'object',
|
|
49
|
+
'additionalProperties': (
|
|
50
|
+
{'type': 'array', 'items': _handle_type(value_type, defs)}
|
|
51
|
+
if hasattr(args[1], '__origin__') else _handle_type(args[1], defs)
|
|
52
|
+
)
|
|
53
|
+
}
|
|
54
|
+
elif origin in (list, tuple, set):
|
|
55
|
+
schema = {'type': 'array', 'items': _handle_type(args[0], defs)}
|
|
56
|
+
if origin is set:
|
|
57
|
+
schema['uniqueItems'] = True
|
|
58
|
+
return schema
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
# %% ../01_funccall.ipynb 21
|
|
62
|
+
def _process_property(name, obj, props, req, defs):
|
|
63
|
+
"Process a single property of the schema"
|
|
64
|
+
p = _param(name, obj)
|
|
65
|
+
props[name] = p
|
|
66
|
+
if obj.default is empty: req[name] = True
|
|
67
|
+
|
|
68
|
+
if hasattr(obj.anno, '__origin__'):
|
|
69
|
+
p.update(_handle_container(obj.anno.__origin__, obj.anno.__args__, defs))
|
|
70
|
+
else:
|
|
71
|
+
p.update(_handle_type(obj.anno, defs))
|
|
72
|
+
|
|
73
|
+
# %% ../01_funccall.ipynb 22
|
|
74
|
+
def _get_nested_schema(obj):
|
|
75
|
+
"Generate nested JSON schema for a class or function"
|
|
76
|
+
d = docments(obj, full=True)
|
|
77
|
+
props, req, defs = {}, {}, {}
|
|
78
|
+
|
|
79
|
+
for n, o in d.items():
|
|
80
|
+
if n != 'return' and n != 'self':
|
|
81
|
+
_process_property(n, o, props, req, defs)
|
|
82
|
+
|
|
83
|
+
schema = dict(type='object', properties=props, title=obj.__name__ if isinstance(obj, type) else None)
|
|
84
|
+
if req: schema['required'] = list(req)
|
|
85
|
+
if defs: schema['$defs'] = defs
|
|
86
|
+
return schema
|
|
87
|
+
|
|
88
|
+
# %% ../01_funccall.ipynb 26
|
|
33
89
|
def get_schema(f:callable, pname='input_schema')->dict:
|
|
34
|
-
"
|
|
35
|
-
|
|
36
|
-
ret = d.pop('return')
|
|
37
|
-
d.pop('self', None) # Ignore `self` for methods
|
|
38
|
-
paramd = {
|
|
39
|
-
'type': "object",
|
|
40
|
-
'properties': {n:_param(n,o) for n,o in d.items() if n[0]!='_'},
|
|
41
|
-
'required': [n for n,o in d.items() if o.default is empty and n[0]!='_']
|
|
42
|
-
}
|
|
90
|
+
"Generate JSON schema for a class, function, or method"
|
|
91
|
+
schema = _get_nested_schema(f)
|
|
43
92
|
desc = f.__doc__
|
|
44
93
|
assert desc, "Docstring missing!"
|
|
94
|
+
d = docments(f, full=True)
|
|
95
|
+
ret = d.pop('return')
|
|
45
96
|
if ret.anno is not empty: desc += f'\n\nReturns:\n- type: {_types(ret.anno)[0]}'
|
|
46
|
-
|
|
47
|
-
return {'name':f.__name__, 'description':desc, pname:paramd}
|
|
97
|
+
return {"name": f.__name__, "description": desc, pname: schema}
|
|
48
98
|
|
|
49
|
-
# %% ../01_funccall.ipynb
|
|
99
|
+
# %% ../01_funccall.ipynb 39
|
|
50
100
|
import ast, time, signal, traceback
|
|
51
101
|
from fastcore.utils import *
|
|
52
102
|
|
|
53
|
-
# %% ../01_funccall.ipynb
|
|
103
|
+
# %% ../01_funccall.ipynb 40
|
|
54
104
|
def _copy_loc(new, orig):
|
|
55
105
|
"Copy location information from original node to new node and all children."
|
|
56
106
|
new = ast.copy_location(new, orig)
|
|
@@ -59,7 +109,7 @@ def _copy_loc(new, orig):
|
|
|
59
109
|
elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])
|
|
60
110
|
return new
|
|
61
111
|
|
|
62
|
-
# %% ../01_funccall.ipynb
|
|
112
|
+
# %% ../01_funccall.ipynb 42
|
|
63
113
|
def _run(code:str ):
|
|
64
114
|
"Run `code`, returning final expression (similar to IPython)"
|
|
65
115
|
tree = ast.parse(code)
|
|
@@ -82,7 +132,7 @@ def _run(code:str ):
|
|
|
82
132
|
if _result is not None: return _result
|
|
83
133
|
return stdout_buffer.getvalue().strip()
|
|
84
134
|
|
|
85
|
-
# %% ../01_funccall.ipynb
|
|
135
|
+
# %% ../01_funccall.ipynb 47
|
|
86
136
|
def python(code, # Code to execute
|
|
87
137
|
timeout=5 # Maximum run time in seconds before a `TimeoutError` is raised
|
|
88
138
|
): # Result of last node, if it's an expression, or `None` otherwise
|
|
@@ -94,3 +144,19 @@ def python(code, # Code to execute
|
|
|
94
144
|
try: return _run(code)
|
|
95
145
|
except Exception as e: return traceback.format_exc()
|
|
96
146
|
finally: signal.alarm(0)
|
|
147
|
+
|
|
148
|
+
# %% ../01_funccall.ipynb 54
|
|
149
|
+
def mk_ns(*funcs_or_objs):
|
|
150
|
+
merged = {}
|
|
151
|
+
for o in funcs_or_objs:
|
|
152
|
+
if isinstance(o, type): merged |= {n:getattr(o,n) for n,m in o.__dict__.items() if isinstance(m, (staticmethod, classmethod))}
|
|
153
|
+
if isinstance(o, object): merged |= {n:getattr(o,n) for n, m in inspect.getmembers(o, inspect.ismethod)} | {n:m for n,m in o.__class__.__dict__.items() if isinstance(m, staticmethod)}
|
|
154
|
+
if callable(o) and hasattr(o, '__name__'): merged |= {o.__name__: o}
|
|
155
|
+
return merged
|
|
156
|
+
|
|
157
|
+
# %% ../01_funccall.ipynb 63
|
|
158
|
+
def call_func(fc_name, fc_inputs, ns):
|
|
159
|
+
"Call the function `fc_name` with the given `fc_inputs` using namespace `ns`."
|
|
160
|
+
if not isinstance(ns, abc.Mapping): ns = mk_ns(*ns)
|
|
161
|
+
func = ns[fc_name]
|
|
162
|
+
return func(**fc_inputs)
|
toolslm/md_hier.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from fastcore.utils import *
|
|
3
|
+
__all__ = ['markdown_to_dict', 'create_heading_dict']
|
|
4
|
+
|
|
5
|
+
def markdown_to_dict(markdown_content):
|
|
6
|
+
def clean_heading(text): return re.sub(r'[^A-Za-z0-9 ]+', '', text).strip()
|
|
7
|
+
|
|
8
|
+
lines = markdown_content.splitlines()
|
|
9
|
+
headings = []
|
|
10
|
+
|
|
11
|
+
# Parse headings with their levels and line numbers
|
|
12
|
+
for idx, line in enumerate(lines):
|
|
13
|
+
match = re.match(r'^(#{1,6})\s*(.*)', line)
|
|
14
|
+
if match:
|
|
15
|
+
level = len(match.group(1))
|
|
16
|
+
text = match.group(2).strip()
|
|
17
|
+
headings.append({'level': level, 'text': text, 'line': idx})
|
|
18
|
+
|
|
19
|
+
# Assign content to each heading, including subheadings
|
|
20
|
+
for i, h in enumerate(headings):
|
|
21
|
+
start = h['line'] # Include the heading line itself
|
|
22
|
+
# Find the end index: next heading of same or higher level
|
|
23
|
+
for j in range(i + 1, len(headings)):
|
|
24
|
+
if headings[j]['level'] <= h['level']:
|
|
25
|
+
end = headings[j]['line']
|
|
26
|
+
break
|
|
27
|
+
else: end = len(lines)
|
|
28
|
+
h['content'] = '\n'.join(lines[start:end]).strip()
|
|
29
|
+
|
|
30
|
+
# Build the dictionary with hierarchical keys
|
|
31
|
+
result,stack = {},[]
|
|
32
|
+
for h in headings:
|
|
33
|
+
stack = stack[:h['level'] - 1] + [clean_heading(h['text'])]
|
|
34
|
+
key = '.'.join(stack)
|
|
35
|
+
result[key] = h['content']
|
|
36
|
+
return dict2obj(result)
|
|
37
|
+
|
|
38
|
+
def create_heading_dict(text):
|
|
39
|
+
headings = re.findall(r'^#+.*', text, flags=re.MULTILINE)
|
|
40
|
+
result = {}
|
|
41
|
+
stack = [result]
|
|
42
|
+
prev_level = 0
|
|
43
|
+
|
|
44
|
+
for heading in headings:
|
|
45
|
+
level = heading.count('#')
|
|
46
|
+
title = heading.strip('#').strip()
|
|
47
|
+
while level <= prev_level:
|
|
48
|
+
stack.pop()
|
|
49
|
+
prev_level -= 1
|
|
50
|
+
new_dict = {}
|
|
51
|
+
stack[-1][title] = new_dict
|
|
52
|
+
stack.append(new_dict)
|
|
53
|
+
prev_level = level
|
|
54
|
+
return dict2obj(result)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
if __name__=='__main__':
|
|
58
|
+
md_content = """
|
|
59
|
+
# User
|
|
60
|
+
|
|
61
|
+
This is the User section.
|
|
62
|
+
|
|
63
|
+
## Tokens
|
|
64
|
+
|
|
65
|
+
Details about tokens.
|
|
66
|
+
|
|
67
|
+
### Value
|
|
68
|
+
|
|
69
|
+
The value of tokens.
|
|
70
|
+
|
|
71
|
+
Some more details.
|
|
72
|
+
|
|
73
|
+
## Settings
|
|
74
|
+
|
|
75
|
+
User settings information.
|
|
76
|
+
|
|
77
|
+
# Admin
|
|
78
|
+
|
|
79
|
+
Admin section.
|
|
80
|
+
|
|
81
|
+
## Users
|
|
82
|
+
|
|
83
|
+
Admin users management.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
result = markdown_to_dict(md_content)
|
|
87
|
+
#for key, value in result.items(): print(f'Key: {key}\nValue:\n{value}\n{"-"*40}')
|
|
88
|
+
|
|
89
|
+
def test_empty_content():
|
|
90
|
+
md_content = "# Empty Heading"
|
|
91
|
+
result = markdown_to_dict(md_content)
|
|
92
|
+
assert result['Empty Heading'] == '# Empty Heading'
|
|
93
|
+
|
|
94
|
+
def test_special_characters():
|
|
95
|
+
md_content = "# Heading *With* Special _Characters_!\nContent under heading."
|
|
96
|
+
result = markdown_to_dict(md_content)
|
|
97
|
+
assert 'Heading With Special Characters' in result
|
|
98
|
+
assert result['Heading With Special Characters'] == '# Heading *With* Special _Characters_!\nContent under heading.'
|
|
99
|
+
|
|
100
|
+
def test_duplicate_headings():
|
|
101
|
+
md_content = "# Duplicate\n## Duplicate\n### Duplicate\nContent under duplicate headings."
|
|
102
|
+
result = markdown_to_dict(md_content)
|
|
103
|
+
assert 'Duplicate' in result
|
|
104
|
+
assert 'Duplicate.Duplicate' in result
|
|
105
|
+
assert 'Duplicate.Duplicate.Duplicate' in result
|
|
106
|
+
assert result['Duplicate.Duplicate.Duplicate'] == '### Duplicate\nContent under duplicate headings.'
|
|
107
|
+
|
|
108
|
+
def test_no_content():
|
|
109
|
+
md_content = "# No Content Heading\n## Subheading"
|
|
110
|
+
result = markdown_to_dict(md_content)
|
|
111
|
+
assert result['No Content Heading'] == '# No Content Heading\n## Subheading'
|
|
112
|
+
assert result['No Content Heading.Subheading'] == '## Subheading'
|
|
113
|
+
|
|
114
|
+
def test_different_levels():
|
|
115
|
+
md_content = "### Level 3 Heading\nContent at level 3.\n# Level 1 Heading\nContent at level 1."
|
|
116
|
+
result = markdown_to_dict(md_content)
|
|
117
|
+
assert 'Level 3 Heading' in result
|
|
118
|
+
assert 'Level 1 Heading' in result
|
|
119
|
+
assert result['Level 3 Heading'] == '### Level 3 Heading\nContent at level 3.'
|
|
120
|
+
assert result['Level 1 Heading'] == '# Level 1 Heading\nContent at level 1.'
|
|
121
|
+
|
|
122
|
+
def test_parent_includes_subheadings():
|
|
123
|
+
md_content = "# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content."
|
|
124
|
+
result = markdown_to_dict(md_content)
|
|
125
|
+
assert result['Parent'] == '# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content.'
|
|
126
|
+
assert result['Parent.Child'] == '## Child\nChild content.\n### Grandchild\nGrandchild content.'
|
|
127
|
+
assert result['Parent.Child.Grandchild'] == '### Grandchild\nGrandchild content.'
|
|
128
|
+
|
|
129
|
+
test_empty_content()
|
|
130
|
+
test_special_characters()
|
|
131
|
+
test_duplicate_headings()
|
|
132
|
+
test_no_content()
|
|
133
|
+
test_different_levels()
|
|
134
|
+
test_parent_includes_subheadings()
|
|
135
|
+
print('tests passed')
|
|
136
|
+
|
toolslm/xml.py
CHANGED
|
@@ -10,7 +10,7 @@ from collections import namedtuple
|
|
|
10
10
|
from fastcore.utils import *
|
|
11
11
|
from fastcore.meta import delegates
|
|
12
12
|
from fastcore.xtras import hl_md
|
|
13
|
-
from fastcore.xml import to_xml, Document, Documents, Document_content,
|
|
13
|
+
from fastcore.xml import to_xml, Document, Documents, Document_content, Src
|
|
14
14
|
from fastcore.script import call_parse
|
|
15
15
|
try: from IPython import display
|
|
16
16
|
except: display=None
|
|
@@ -32,7 +32,7 @@ def json_to_xml(d:dict, # JSON dictionary to convert
|
|
|
32
32
|
return ET.tostring(root, encoding='unicode')
|
|
33
33
|
|
|
34
34
|
# %% ../00_xml.ipynb 9
|
|
35
|
-
doctype = namedtuple('doctype', ['
|
|
35
|
+
doctype = namedtuple('doctype', ['src', 'content'])
|
|
36
36
|
|
|
37
37
|
# %% ../00_xml.ipynb 11
|
|
38
38
|
def _add_nls(s):
|
|
@@ -42,40 +42,40 @@ def _add_nls(s):
|
|
|
42
42
|
if s[-1]!='\n': s = s+'\n'
|
|
43
43
|
return s
|
|
44
44
|
|
|
45
|
-
# %% ../00_xml.ipynb
|
|
45
|
+
# %% ../00_xml.ipynb 16
|
|
46
46
|
def mk_doctype(content:str, # The document content
|
|
47
|
-
|
|
47
|
+
src:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
|
|
48
48
|
) -> namedtuple:
|
|
49
49
|
"Create a `doctype` named tuple"
|
|
50
|
-
if
|
|
51
|
-
return doctype(_add_nls(str(
|
|
50
|
+
if src is None: src = hashlib.md5(content.encode()).hexdigest()[:8]
|
|
51
|
+
return doctype(_add_nls(str(src).strip()), _add_nls(content.strip()))
|
|
52
52
|
|
|
53
|
-
# %% ../00_xml.ipynb
|
|
53
|
+
# %% ../00_xml.ipynb 19
|
|
54
54
|
def mk_doc(index:int, # The document index
|
|
55
55
|
content:str, # The document content
|
|
56
|
-
|
|
56
|
+
src:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided
|
|
57
57
|
**kwargs
|
|
58
58
|
) -> tuple:
|
|
59
59
|
"Create an `ft` format tuple for a single doc in Anthropic's recommended format"
|
|
60
|
-
dt = mk_doctype(content,
|
|
61
|
-
content = Document_content(dt.content)
|
|
62
|
-
|
|
63
|
-
return Document(
|
|
60
|
+
dt = mk_doctype(content, src)
|
|
61
|
+
content = Document_content(NotStr(dt.content))
|
|
62
|
+
src = Src(NotStr(dt.src))
|
|
63
|
+
return Document(src, content, index=index, **kwargs)
|
|
64
64
|
|
|
65
|
-
# %% ../00_xml.ipynb
|
|
65
|
+
# %% ../00_xml.ipynb 22
|
|
66
66
|
def docs_xml(docs:list[str], # The content of each document
|
|
67
|
-
|
|
67
|
+
srcs:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
|
|
68
68
|
prefix:bool=True, # Include Anthropic's suggested prose intro?
|
|
69
69
|
details:Optional[list]=None # Optional list of dicts with additional attrs for each doc
|
|
70
70
|
)->str:
|
|
71
71
|
"Create an XML string containing `docs` in Anthropic's recommended format"
|
|
72
72
|
pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
|
|
73
|
-
if
|
|
73
|
+
if srcs is None: srcs = [None]*len(docs)
|
|
74
74
|
if details is None: details = [{}]*len(docs)
|
|
75
|
-
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,
|
|
75
|
+
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
|
|
76
76
|
return pre + to_xml(Documents(docs))
|
|
77
77
|
|
|
78
|
-
# %% ../00_xml.ipynb
|
|
78
|
+
# %% ../00_xml.ipynb 29
|
|
79
79
|
def files2ctx(
|
|
80
80
|
fnames:list[Union[str,Path]], # List of file names to add to context
|
|
81
81
|
prefix:bool=True # Include Anthropic's suggested prose intro?
|
|
@@ -84,7 +84,7 @@ def files2ctx(
|
|
|
84
84
|
contents = [o.read_text() for o in fnames]
|
|
85
85
|
return docs_xml(contents, fnames, prefix=prefix)
|
|
86
86
|
|
|
87
|
-
# %% ../00_xml.ipynb
|
|
87
|
+
# %% ../00_xml.ipynb 32
|
|
88
88
|
@delegates(globtastic)
|
|
89
89
|
def folder2ctx(
|
|
90
90
|
folder:Union[str,Path], # Folder name containing files to add to context
|
|
@@ -94,11 +94,11 @@ def folder2ctx(
|
|
|
94
94
|
fnames = globtastic(folder, **kwargs)
|
|
95
95
|
return files2ctx(fnames, prefix=prefix)
|
|
96
96
|
|
|
97
|
-
# %% ../00_xml.ipynb
|
|
97
|
+
# %% ../00_xml.ipynb 34
|
|
98
98
|
@call_parse
|
|
99
99
|
@delegates(folder2ctx)
|
|
100
100
|
def folder2ctx_cli(
|
|
101
101
|
folder:str, # Folder name containing files to add to context
|
|
102
102
|
**kwargs # Passed to `folder2ctx`
|
|
103
103
|
)->str: # XML for Claude context
|
|
104
|
-
|
|
104
|
+
print(folder2ctx(folder, **kwargs))
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: toolslm
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Tools to make language models a bit easier to use
|
|
5
|
+
Home-page: https://github.com/AnswerDotAI/toolslm
|
|
6
|
+
Author: Jeremy Howard
|
|
7
|
+
Author-email: j@fast.ai
|
|
8
|
+
License: Apache Software License 2.0
|
|
9
|
+
Keywords: nbdev jupyter notebook python
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Natural Language :: English
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: fastcore>=1.5.47
|
|
20
|
+
Requires-Dist: beautifulsoup4
|
|
21
|
+
Requires-Dist: html2text
|
|
22
|
+
Requires-Dist: httpx
|
|
23
|
+
Requires-Dist: llms-txt
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
|
|
26
|
+
# toolslm
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
30
|
+
|
|
31
|
+
This is a work in progress…
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
``` sh
|
|
36
|
+
pip install toolslm
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## How to use
|
|
40
|
+
|
|
41
|
+
### Context creation
|
|
42
|
+
|
|
43
|
+
toolslm has some helpers to make it easier to generate XML context from
|
|
44
|
+
files, for instance
|
|
45
|
+
[`folder2ctx`](https://AnswerDotAI.github.io/toolslm/xml.html#folder2ctx):
|
|
46
|
+
|
|
47
|
+
``` python
|
|
48
|
+
print(folder2ctx('samples', prefix=False, file_glob='*.py'))
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
<documents><document index="1"><src>
|
|
52
|
+
samples/sample_core.py
|
|
53
|
+
</src><document-content>
|
|
54
|
+
import inspect
|
|
55
|
+
empty = inspect.Parameter.empty
|
|
56
|
+
models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'
|
|
57
|
+
</document-content></document></documents>
|
|
58
|
+
|
|
59
|
+
JSON doesn’t map as nicely to XML as the `ft` data structure from
|
|
60
|
+
`fastcore.xml`, but for simple XML trees it can be convenient. The
|
|
61
|
+
[`json_to_xml`](https://AnswerDotAI.github.io/toolslm/xml.html#json_to_xml)
|
|
62
|
+
function handles that conversion:
|
|
63
|
+
|
|
64
|
+
``` python
|
|
65
|
+
a = dict(surname='Howard', firstnames=['Jeremy','Peter'],
|
|
66
|
+
address=dict(state='Queensland',country='Australia'))
|
|
67
|
+
print(json_to_xml(a, 'person'))
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
<person>
|
|
71
|
+
<surname>Howard</surname>
|
|
72
|
+
<firstnames>
|
|
73
|
+
<item>Jeremy</item>
|
|
74
|
+
<item>Peter</item>
|
|
75
|
+
</firstnames>
|
|
76
|
+
<address>
|
|
77
|
+
<state>Queensland</state>
|
|
78
|
+
<country>Australia</country>
|
|
79
|
+
</address>
|
|
80
|
+
</person>
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
toolslm/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
|
|
2
|
+
toolslm/_modidx.py,sha256=EIl2FBWhcZUS46r1AU0wURYg2O6Z3aXTPUr3p8Smrqk,3882
|
|
3
|
+
toolslm/download.py,sha256=tXhq77GCqwVFDzTtzjcSAjxUWRiyPsjlXzkMjleH3dQ,4378
|
|
4
|
+
toolslm/funccall.py,sha256=hSvBvfMv-YcBSUUs4-NrYu1f8jg4gfu2s82cPyIHVkU,6534
|
|
5
|
+
toolslm/md_hier.py,sha256=hkCjuOfIFWuMEiM2_XCoD9QIBjy9huLOSvpX_bMdn0Y,4645
|
|
6
|
+
toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
|
|
7
|
+
toolslm/xml.py,sha256=QNwUavoMkFK84D7dMwnBjqlYJwN-pJ7u3BxOeDuNAmk,4088
|
|
8
|
+
toolslm-0.1.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
toolslm-0.1.0.dist-info/METADATA,sha256=tQBydygSCJdH_wQaIbzC8Z8rZanQTJOdmpe1nEETkdE,2205
|
|
10
|
+
toolslm-0.1.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
11
|
+
toolslm-0.1.0.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
+
toolslm-0.1.0.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
+
toolslm-0.1.0.dist-info/RECORD,,
|
toolslm-0.0.6.dist-info/METADATA
DELETED
|
@@ -1,154 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: toolslm
|
|
3
|
-
Version: 0.0.6
|
|
4
|
-
Summary: Tools to make language models a bit easier to use
|
|
5
|
-
Home-page: https://github.com/AnswerDotAI/toolslm
|
|
6
|
-
Author: Jeremy Howard
|
|
7
|
-
Author-email: j@fast.ai
|
|
8
|
-
License: Apache Software License 2.0
|
|
9
|
-
Keywords: nbdev jupyter notebook python
|
|
10
|
-
Classifier: Development Status :: 4 - Beta
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: Natural Language :: English
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
-
Requires-Python: >=3.9
|
|
17
|
-
Description-Content-Type: text/markdown
|
|
18
|
-
License-File: LICENSE
|
|
19
|
-
Requires-Dist: fastcore >=1.5.47
|
|
20
|
-
Requires-Dist: beautifulsoup4
|
|
21
|
-
Requires-Dist: html2text
|
|
22
|
-
Requires-Dist: httpx
|
|
23
|
-
Requires-Dist: llms-txt
|
|
24
|
-
Provides-Extra: dev
|
|
25
|
-
|
|
26
|
-
# toolslm
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
30
|
-
|
|
31
|
-
This is a work in progress…
|
|
32
|
-
|
|
33
|
-
## Install
|
|
34
|
-
|
|
35
|
-
``` sh
|
|
36
|
-
pip install toolslm
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
## How to use
|
|
40
|
-
|
|
41
|
-
### Context creation
|
|
42
|
-
|
|
43
|
-
toolslm has some helpers to make it easier to generate XML context from
|
|
44
|
-
files, for instance `folder2ctx`:
|
|
45
|
-
|
|
46
|
-
``` python
|
|
47
|
-
print(folder2ctx('samples', prefix=False, file_glob='*.py'))
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
<documents>
|
|
51
|
-
<document index="1">
|
|
52
|
-
<source>
|
|
53
|
-
samples/sample_core.py
|
|
54
|
-
</source>
|
|
55
|
-
<document_content>
|
|
56
|
-
import inspect
|
|
57
|
-
empty = inspect.Parameter.empty
|
|
58
|
-
models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'
|
|
59
|
-
</document_content>
|
|
60
|
-
</document>
|
|
61
|
-
</documents>
|
|
62
|
-
|
|
63
|
-
### XML helpers
|
|
64
|
-
|
|
65
|
-
Many language models work well with XML inputs, but XML can be a bit
|
|
66
|
-
clunky to work with manually. Therefore, toolslm includes a couple of
|
|
67
|
-
more streamlined approaches for XML generation.
|
|
68
|
-
|
|
69
|
-
An XML node contains a tag, optional children, and optional attributes.
|
|
70
|
-
`xt` creates a tuple of these three things, which we will use to general
|
|
71
|
-
XML shortly. Attributes are passed as kwargs; since these might conflict
|
|
72
|
-
with reserved words in Python, you can optionally add a `_` prefix and
|
|
73
|
-
it’ll be stripped off.
|
|
74
|
-
|
|
75
|
-
``` python
|
|
76
|
-
xt('x-custom', ['hi'], _class='bar')
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
('x-custom', ['hi'], {'class': 'bar'})
|
|
80
|
-
|
|
81
|
-
Claudette has functions defined for some common HTML elements to create
|
|
82
|
-
`xt` tuples more easily, including these:
|
|
83
|
-
|
|
84
|
-
``` python
|
|
85
|
-
from toolslm.xml import div,img,h1,h2,p,hr,html
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
``` python
|
|
89
|
-
a = html([
|
|
90
|
-
p('This is a paragraph'),
|
|
91
|
-
hr(),
|
|
92
|
-
img(src='http://example.prg'),
|
|
93
|
-
div([
|
|
94
|
-
h1('This is a header'),
|
|
95
|
-
h2('This is a sub-header', style='k:v'),
|
|
96
|
-
], _class='foo')
|
|
97
|
-
])
|
|
98
|
-
a
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
('html',
|
|
102
|
-
[('p', 'This is a paragraph', {}),
|
|
103
|
-
('hr', None, {}),
|
|
104
|
-
('img', None, {'src': 'http://example.prg'}),
|
|
105
|
-
('div',
|
|
106
|
-
[('h1', 'This is a header', {}),
|
|
107
|
-
('h2', 'This is a sub-header', {'style': 'k:v'})],
|
|
108
|
-
{'class': 'foo'})],
|
|
109
|
-
{})
|
|
110
|
-
|
|
111
|
-
To convert a tuple data structure created with `xt` and friends into
|
|
112
|
-
XML, use `to_xml`, adding the `hl` parameter to optionally add syntax
|
|
113
|
-
highlighting:
|
|
114
|
-
|
|
115
|
-
``` python
|
|
116
|
-
to_xml(a, hl=True)
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
``` xml
|
|
120
|
-
<html>
|
|
121
|
-
<p>This is a paragraph</p>
|
|
122
|
-
<hr />
|
|
123
|
-
<img src="http://example.prg" />
|
|
124
|
-
<div class="foo">
|
|
125
|
-
<h1>This is a header</h1>
|
|
126
|
-
<h2 style="k:v">This is a sub-header</h2>
|
|
127
|
-
</div>
|
|
128
|
-
</html>
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
JSON doesn’t map as nicely to XML as the `xt` data structure, but for
|
|
132
|
-
simple XML trees it can be convenient. The `json_to_xml` function
|
|
133
|
-
handles that conversion:
|
|
134
|
-
|
|
135
|
-
``` python
|
|
136
|
-
a = dict(surname='Howard', firstnames=['Jeremy','Peter'],
|
|
137
|
-
address=dict(state='Queensland',country='Australia'))
|
|
138
|
-
print(json_to_xml(a, 'person'))
|
|
139
|
-
```
|
|
140
|
-
|
|
141
|
-
<person>
|
|
142
|
-
<surname>Howard</surname>
|
|
143
|
-
<firstnames>
|
|
144
|
-
<item>Jeremy</item>
|
|
145
|
-
<item>Peter</item>
|
|
146
|
-
</firstnames>
|
|
147
|
-
<address>
|
|
148
|
-
<state>Queensland</state>
|
|
149
|
-
<country>Australia</country>
|
|
150
|
-
</address>
|
|
151
|
-
</person>
|
|
152
|
-
|
|
153
|
-
See the `xml source` section for a walkthru of XML and document context
|
|
154
|
-
generation functionality.
|
toolslm-0.0.6.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
toolslm/__init__.py,sha256=QiiYsv0kcJaB8wCWyT-FnI2b6be87HA-CrrIUn8LQhg,22
|
|
2
|
-
toolslm/_modidx.py,sha256=FiHwMAAjvPdu7kN0pA1OJTJbUg0ddo0o12_C9JUFPDc,3103
|
|
3
|
-
toolslm/download.py,sha256=BIhmbDSxM__57tukac63iwPx5sXIfbjYp7gh_fhp4Gw,3621
|
|
4
|
-
toolslm/funccall.py,sha256=ZXfzhP0N5cex7n8QHuxDfUb0BJX1iI1inFb064LAGlc,3914
|
|
5
|
-
toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
|
|
6
|
-
toolslm/xml.py,sha256=Alcd96KfNO8LklVefyc51LbXBoVLRSgifrpMVZPqYsc,4120
|
|
7
|
-
toolslm-0.0.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
8
|
-
toolslm-0.0.6.dist-info/METADATA,sha256=_J7uXsh_qRX_wVKxssQjFpRH7zZjAwC0av1J5UIfTdk,3882
|
|
9
|
-
toolslm-0.0.6.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
10
|
-
toolslm-0.0.6.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
11
|
-
toolslm-0.0.6.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
12
|
-
toolslm-0.0.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|