toolslm 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolslm/__init__.py +1 -1
- toolslm/_modidx.py +17 -1
- toolslm/download.py +109 -0
- toolslm/funccall.py +87 -20
- toolslm/md_hier.py +136 -0
- toolslm/xml.py +12 -20
- {toolslm-0.0.5.dist-info → toolslm-0.0.7.dist-info}/METADATA +5 -1
- toolslm-0.0.7.dist-info/RECORD +13 -0
- toolslm-0.0.5.dist-info/RECORD +0 -11
- {toolslm-0.0.5.dist-info → toolslm-0.0.7.dist-info}/LICENSE +0 -0
- {toolslm-0.0.5.dist-info → toolslm-0.0.7.dist-info}/WHEEL +0 -0
- {toolslm-0.0.5.dist-info → toolslm-0.0.7.dist-info}/entry_points.txt +0 -0
- {toolslm-0.0.5.dist-info → toolslm-0.0.7.dist-info}/top_level.txt +0 -0
toolslm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.7"
|
toolslm/_modidx.py
CHANGED
|
@@ -5,12 +5,28 @@ d = { 'settings': { 'branch': 'main',
|
|
|
5
5
|
'doc_host': 'https://AnswerDotAI.github.io',
|
|
6
6
|
'git_url': 'https://github.com/AnswerDotAI/toolslm',
|
|
7
7
|
'lib_path': 'toolslm'},
|
|
8
|
-
'syms': { 'toolslm.
|
|
8
|
+
'syms': { 'toolslm.download': { 'toolslm.download._tryget': ('download.html#_tryget', 'toolslm/download.py'),
|
|
9
|
+
'toolslm.download.clean_md': ('download.html#clean_md', 'toolslm/download.py'),
|
|
10
|
+
'toolslm.download.find_docs': ('download.html#find_docs', 'toolslm/download.py'),
|
|
11
|
+
'toolslm.download.get_llmstxt': ('download.html#get_llmstxt', 'toolslm/download.py'),
|
|
12
|
+
'toolslm.download.html2md': ('download.html#html2md', 'toolslm/download.py'),
|
|
13
|
+
'toolslm.download.read_docs': ('download.html#read_docs', 'toolslm/download.py'),
|
|
14
|
+
'toolslm.download.read_html': ('download.html#read_html', 'toolslm/download.py'),
|
|
15
|
+
'toolslm.download.read_md': ('download.html#read_md', 'toolslm/download.py'),
|
|
16
|
+
'toolslm.download.split_url': ('download.html#split_url', 'toolslm/download.py')},
|
|
17
|
+
'toolslm.funccall': { 'toolslm.funccall._copy_loc': ('funccall.html#_copy_loc', 'toolslm/funccall.py'),
|
|
18
|
+
'toolslm.funccall._get_nested_schema': ('funccall.html#_get_nested_schema', 'toolslm/funccall.py'),
|
|
19
|
+
'toolslm.funccall._handle_container': ('funccall.html#_handle_container', 'toolslm/funccall.py'),
|
|
20
|
+
'toolslm.funccall._handle_type': ('funccall.html#_handle_type', 'toolslm/funccall.py'),
|
|
9
21
|
'toolslm.funccall._param': ('funccall.html#_param', 'toolslm/funccall.py'),
|
|
22
|
+
'toolslm.funccall._process_property': ('funccall.html#_process_property', 'toolslm/funccall.py'),
|
|
10
23
|
'toolslm.funccall._run': ('funccall.html#_run', 'toolslm/funccall.py'),
|
|
11
24
|
'toolslm.funccall._types': ('funccall.html#_types', 'toolslm/funccall.py'),
|
|
25
|
+
'toolslm.funccall.call_func': ('funccall.html#call_func', 'toolslm/funccall.py'),
|
|
12
26
|
'toolslm.funccall.get_schema': ('funccall.html#get_schema', 'toolslm/funccall.py'),
|
|
27
|
+
'toolslm.funccall.mk_ns': ('funccall.html#mk_ns', 'toolslm/funccall.py'),
|
|
13
28
|
'toolslm.funccall.python': ('funccall.html#python', 'toolslm/funccall.py')},
|
|
29
|
+
'toolslm.md_hier': {},
|
|
14
30
|
'toolslm.shell': { 'toolslm.shell.TerminalInteractiveShell.run_cell': ( 'shell.html#terminalinteractiveshell.run_cell',
|
|
15
31
|
'toolslm/shell.py'),
|
|
16
32
|
'toolslm.shell.get_shell': ('shell.html#get_shell', 'toolslm/shell.py')},
|
toolslm/download.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../03_download.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto 0
|
|
4
|
+
__all__ = ['clean_md', 'read_md', 'html2md', 'read_html', 'get_llmstxt', 'split_url', 'find_docs', 'read_docs']
|
|
5
|
+
|
|
6
|
+
# %% ../03_download.ipynb 2
|
|
7
|
+
from fastcore.utils import *
|
|
8
|
+
from httpx import get
|
|
9
|
+
from fastcore.meta import delegates
|
|
10
|
+
from llms_txt import *
|
|
11
|
+
|
|
12
|
+
from html2text import HTML2Text
|
|
13
|
+
from bs4 import BeautifulSoup
|
|
14
|
+
from urllib.parse import urlparse, urljoin
|
|
15
|
+
|
|
16
|
+
# %% ../03_download.ipynb 4
|
|
17
|
+
def clean_md(text, rm_comments=True, rm_details=True):
|
|
18
|
+
"Remove comments and `<details>` sections from `text`"
|
|
19
|
+
if rm_comments: text = re.sub(r'\n?<!--.*?-->\n?', '', text, flags=re.DOTALL)
|
|
20
|
+
if rm_details: text = re.sub(r'\n?<details>.*?</details>\n?', '', text, flags=re.DOTALL)
|
|
21
|
+
return text
|
|
22
|
+
|
|
23
|
+
# %% ../03_download.ipynb 5
|
|
24
|
+
@delegates(get)
|
|
25
|
+
def read_md(url, rm_comments=True, rm_details=True, **kwargs):
|
|
26
|
+
"Read text from `url` and clean with `clean_docs`"
|
|
27
|
+
return clean_md(get(url, **kwargs).text, rm_comments=rm_comments, rm_details=rm_details)
|
|
28
|
+
|
|
29
|
+
# %% ../03_download.ipynb 7
|
|
30
|
+
def html2md(s:str):
|
|
31
|
+
"Convert `s` from HTML to markdown"
|
|
32
|
+
o = HTML2Text(bodywidth=5000)
|
|
33
|
+
o.ignore_links = True
|
|
34
|
+
o.mark_code = True
|
|
35
|
+
o.ignore_images = True
|
|
36
|
+
return o.handle(s)
|
|
37
|
+
|
|
38
|
+
# %% ../03_download.ipynb 8
|
|
39
|
+
def read_html(url, # URL to read
|
|
40
|
+
sel=None, # Read only outerHTML of CSS selector `sel`
|
|
41
|
+
rm_comments=True, # Removes HTML comments
|
|
42
|
+
rm_details=True, # Removes `<details>` tags
|
|
43
|
+
multi=False, # Get all matches to `sel` or first one
|
|
44
|
+
wrap_tag=None, #If multi, each selection wrapped with <wrap_tag>content</wrap_tag>
|
|
45
|
+
): # Cleaned markdown
|
|
46
|
+
"Get `url`, optionally selecting CSS selector `sel`, and convert to clean markdown"
|
|
47
|
+
page = get(url).text
|
|
48
|
+
if sel:
|
|
49
|
+
soup = BeautifulSoup(page, 'html.parser')
|
|
50
|
+
if multi:
|
|
51
|
+
page = [str(el) for el in soup.select(sel)]
|
|
52
|
+
if not wrap_tag: page = "\n".join(page)
|
|
53
|
+
else: page = str(soup.select_one(sel))
|
|
54
|
+
mds = map(lambda x: clean_md(html2md(x), rm_comments, rm_details=rm_details), tuplify(page))
|
|
55
|
+
if wrap_tag: return '\n'.join([f"\n<{wrap_tag}>\n{o}</{wrap_tag}>\n" for o in mds])
|
|
56
|
+
else: return'\n'.join(mds)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# %% ../03_download.ipynb 12
|
|
60
|
+
def get_llmstxt(url, optional=False, n_workers=None):
|
|
61
|
+
"Get llms.txt file from and expand it with `llms_txt.create_ctx()`"
|
|
62
|
+
if not url.endswith('llms.txt'): return None
|
|
63
|
+
resp = get(url)
|
|
64
|
+
if resp.status_code!=200: return None
|
|
65
|
+
return create_ctx(resp.text, optional=optional, n_workers=n_workers)
|
|
66
|
+
|
|
67
|
+
# %% ../03_download.ipynb 14
|
|
68
|
+
def split_url(url):
|
|
69
|
+
"Split `url` into base, path, and file name, normalising name to '/' if empty"
|
|
70
|
+
parsed = urlparse(url.strip('/'))
|
|
71
|
+
base = f"{parsed.scheme}://{parsed.netloc}"
|
|
72
|
+
path,spl,fname = parsed.path.rpartition('/')
|
|
73
|
+
fname = spl+fname
|
|
74
|
+
if not path and not fname: path='/'
|
|
75
|
+
return base,path,fname
|
|
76
|
+
|
|
77
|
+
# %% ../03_download.ipynb 16
|
|
78
|
+
def _tryget(url):
|
|
79
|
+
"Return response from `url` if `status_code!=404`, otherwise `None`"
|
|
80
|
+
res = get(url)
|
|
81
|
+
return None if res.status_code==404 else url
|
|
82
|
+
|
|
83
|
+
# %% ../03_download.ipynb 17
|
|
84
|
+
def find_docs(url):
|
|
85
|
+
"If available, return LLM-friendly llms.txt context or markdown file location from `url`"
|
|
86
|
+
base,path,fname = split_url(url)
|
|
87
|
+
url = (base+path+fname).strip('/')
|
|
88
|
+
if fname=='/llms.txt': return url
|
|
89
|
+
if Path(fname).suffix in('.md', '.txt', '.rst'): return _tryget(url)
|
|
90
|
+
if '.' in fname: return _tryget(url+'.md') or find_docs(url[:url.rfind('/')])
|
|
91
|
+
res = _tryget(url+'/llms.txt')
|
|
92
|
+
if res: return res
|
|
93
|
+
res = _tryget(url+'/index.md')
|
|
94
|
+
if res: return res
|
|
95
|
+
res = _tryget(url+'/index.html.md')
|
|
96
|
+
if res: return res
|
|
97
|
+
res = _tryget(url+'/index-commonmark.md')
|
|
98
|
+
if res: return res
|
|
99
|
+
parsed_url = urlparse(url)
|
|
100
|
+
if parsed_url.path == '/' or not parsed_url.path: return None
|
|
101
|
+
return find_docs(urljoin(url, '..'))
|
|
102
|
+
|
|
103
|
+
# %% ../03_download.ipynb 23
|
|
104
|
+
def read_docs(url, optional=False, n_workers=None, rm_comments=True, rm_details=True):
|
|
105
|
+
"If available, return LLM-friendly llms.txt context or markdown file response for `url`"
|
|
106
|
+
url = find_docs(url)
|
|
107
|
+
if url.endswith('/llms.txt'): res = get_llmstxt(url, optional=optional, n_workers=n_workers)
|
|
108
|
+
else: res = get(url).text
|
|
109
|
+
return clean_md(res, rm_comments=rm_comments, rm_details=rm_details)
|
toolslm/funccall.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../01_funccall.ipynb.
|
|
2
2
|
|
|
3
3
|
# %% auto 0
|
|
4
|
-
__all__ = ['empty', 'get_schema', 'python']
|
|
4
|
+
__all__ = ['empty', 'get_schema', 'python', 'mk_ns', 'call_func']
|
|
5
5
|
|
|
6
6
|
# %% ../01_funccall.ipynb 2
|
|
7
7
|
import inspect
|
|
8
|
+
from collections import abc
|
|
8
9
|
from fastcore.utils import *
|
|
9
10
|
from fastcore.docments import docments
|
|
10
11
|
|
|
@@ -16,10 +17,12 @@ def _types(t:type)->tuple[str,Optional[str]]:
|
|
|
16
17
|
"Tuple of json schema type name and (if appropriate) array item name."
|
|
17
18
|
if t is empty: raise TypeError('Missing type')
|
|
18
19
|
tmap = {int:"integer", float:"number", str:"string", bool:"boolean", list:"array", dict:"object"}
|
|
19
|
-
|
|
20
|
-
|
|
20
|
+
tmap.update({k.__name__: v for k, v in tmap.items()})
|
|
21
|
+
if getattr(t, '__origin__', None) in (list,tuple): return "array", tmap.get(t.__args__[0].__name__, "object")
|
|
22
|
+
elif isinstance(t, str): return tmap.get(t, "object"), None
|
|
23
|
+
else: return tmap.get(t.__name__, "object"), None
|
|
21
24
|
|
|
22
|
-
# %% ../01_funccall.ipynb
|
|
25
|
+
# %% ../01_funccall.ipynb 16
|
|
23
26
|
def _param(name, info):
|
|
24
27
|
"json schema parameter given `name` and `info` from docments full dict."
|
|
25
28
|
paramt,itemt = _types(info.anno)
|
|
@@ -28,28 +31,76 @@ def _param(name, info):
|
|
|
28
31
|
if info.default is not empty: pschema["default"] = info.default
|
|
29
32
|
return pschema
|
|
30
33
|
|
|
31
|
-
# %% ../01_funccall.ipynb
|
|
34
|
+
# %% ../01_funccall.ipynb 19
|
|
35
|
+
def _handle_type(t, defs):
|
|
36
|
+
"Handle a single type, creating nested schemas if necessary"
|
|
37
|
+
if isinstance(t, type) and not issubclass(t, (int, float, str, bool)):
|
|
38
|
+
defs[t.__name__] = _get_nested_schema(t)
|
|
39
|
+
return {'$ref': f'#/$defs/{t.__name__}'}
|
|
40
|
+
return {'type': _types(t)[0]}
|
|
41
|
+
|
|
42
|
+
# %% ../01_funccall.ipynb 20
|
|
43
|
+
def _handle_container(origin, args, defs):
|
|
44
|
+
"Handle container types like dict, list, tuple, set"
|
|
45
|
+
if origin is dict:
|
|
46
|
+
value_type = args[1].__args__[0] if hasattr(args[1], '__args__') else args[1]
|
|
47
|
+
return {
|
|
48
|
+
'type': 'object',
|
|
49
|
+
'additionalProperties': (
|
|
50
|
+
{'type': 'array', 'items': _handle_type(value_type, defs)}
|
|
51
|
+
if hasattr(args[1], '__origin__') else _handle_type(args[1], defs)
|
|
52
|
+
)
|
|
53
|
+
}
|
|
54
|
+
elif origin in (list, tuple, set):
|
|
55
|
+
schema = {'type': 'array', 'items': _handle_type(args[0], defs)}
|
|
56
|
+
if origin is set:
|
|
57
|
+
schema['uniqueItems'] = True
|
|
58
|
+
return schema
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
# %% ../01_funccall.ipynb 21
|
|
62
|
+
def _process_property(name, obj, props, req, defs):
|
|
63
|
+
"Process a single property of the schema"
|
|
64
|
+
p = _param(name, obj)
|
|
65
|
+
props[name] = p
|
|
66
|
+
if obj.default is empty: req[name] = True
|
|
67
|
+
|
|
68
|
+
if hasattr(obj.anno, '__origin__'):
|
|
69
|
+
p.update(_handle_container(obj.anno.__origin__, obj.anno.__args__, defs))
|
|
70
|
+
else:
|
|
71
|
+
p.update(_handle_type(obj.anno, defs))
|
|
72
|
+
|
|
73
|
+
# %% ../01_funccall.ipynb 22
|
|
74
|
+
def _get_nested_schema(obj):
|
|
75
|
+
"Generate nested JSON schema for a class or function"
|
|
76
|
+
d = docments(obj, full=True)
|
|
77
|
+
props, req, defs = {}, {}, {}
|
|
78
|
+
|
|
79
|
+
for n, o in d.items():
|
|
80
|
+
if n != 'return' and n != 'self':
|
|
81
|
+
_process_property(n, o, props, req, defs)
|
|
82
|
+
|
|
83
|
+
schema = dict(type='object', properties=props, title=obj.__name__ if isinstance(obj, type) else None)
|
|
84
|
+
if req: schema['required'] = list(req)
|
|
85
|
+
if defs: schema['$defs'] = defs
|
|
86
|
+
return schema
|
|
87
|
+
|
|
88
|
+
# %% ../01_funccall.ipynb 26
|
|
32
89
|
def get_schema(f:callable, pname='input_schema')->dict:
|
|
33
|
-
"
|
|
34
|
-
|
|
35
|
-
ret = d.pop('return')
|
|
36
|
-
d.pop('self', None) # Ignore `self` for methods
|
|
37
|
-
paramd = {
|
|
38
|
-
'type': "object",
|
|
39
|
-
'properties': {n:_param(n,o) for n,o in d.items() if n[0]!='_'},
|
|
40
|
-
'required': [n for n,o in d.items() if o.default is empty and n[0]!='_']
|
|
41
|
-
}
|
|
90
|
+
"Generate JSON schema for a class, function, or method"
|
|
91
|
+
schema = _get_nested_schema(f)
|
|
42
92
|
desc = f.__doc__
|
|
43
93
|
assert desc, "Docstring missing!"
|
|
94
|
+
d = docments(f, full=True)
|
|
95
|
+
ret = d.pop('return')
|
|
44
96
|
if ret.anno is not empty: desc += f'\n\nReturns:\n- type: {_types(ret.anno)[0]}'
|
|
45
|
-
|
|
46
|
-
return {'name':f.__name__, 'description':desc, pname:paramd}
|
|
97
|
+
return {"name": f.__name__, "description": desc, pname: schema}
|
|
47
98
|
|
|
48
|
-
# %% ../01_funccall.ipynb
|
|
99
|
+
# %% ../01_funccall.ipynb 39
|
|
49
100
|
import ast, time, signal, traceback
|
|
50
101
|
from fastcore.utils import *
|
|
51
102
|
|
|
52
|
-
# %% ../01_funccall.ipynb
|
|
103
|
+
# %% ../01_funccall.ipynb 40
|
|
53
104
|
def _copy_loc(new, orig):
|
|
54
105
|
"Copy location information from original node to new node and all children."
|
|
55
106
|
new = ast.copy_location(new, orig)
|
|
@@ -58,7 +109,7 @@ def _copy_loc(new, orig):
|
|
|
58
109
|
elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])
|
|
59
110
|
return new
|
|
60
111
|
|
|
61
|
-
# %% ../01_funccall.ipynb
|
|
112
|
+
# %% ../01_funccall.ipynb 42
|
|
62
113
|
def _run(code:str ):
|
|
63
114
|
"Run `code`, returning final expression (similar to IPython)"
|
|
64
115
|
tree = ast.parse(code)
|
|
@@ -81,7 +132,7 @@ def _run(code:str ):
|
|
|
81
132
|
if _result is not None: return _result
|
|
82
133
|
return stdout_buffer.getvalue().strip()
|
|
83
134
|
|
|
84
|
-
# %% ../01_funccall.ipynb
|
|
135
|
+
# %% ../01_funccall.ipynb 47
|
|
85
136
|
def python(code, # Code to execute
|
|
86
137
|
timeout=5 # Maximum run time in seconds before a `TimeoutError` is raised
|
|
87
138
|
): # Result of last node, if it's an expression, or `None` otherwise
|
|
@@ -93,3 +144,19 @@ def python(code, # Code to execute
|
|
|
93
144
|
try: return _run(code)
|
|
94
145
|
except Exception as e: return traceback.format_exc()
|
|
95
146
|
finally: signal.alarm(0)
|
|
147
|
+
|
|
148
|
+
# %% ../01_funccall.ipynb 54
|
|
149
|
+
def mk_ns(*funcs_or_objs):
|
|
150
|
+
merged = {}
|
|
151
|
+
for o in funcs_or_objs:
|
|
152
|
+
if isinstance(o, type): merged |= {n:getattr(o,n) for n,m in o.__dict__.items() if isinstance(m, (staticmethod, classmethod))}
|
|
153
|
+
if isinstance(o, object): merged |= {n:getattr(o,n) for n, m in inspect.getmembers(o, inspect.ismethod)} | {n:m for n,m in o.__class__.__dict__.items() if isinstance(m, staticmethod)}
|
|
154
|
+
if callable(o) and hasattr(o, '__name__'): merged |= {o.__name__: o}
|
|
155
|
+
return merged
|
|
156
|
+
|
|
157
|
+
# %% ../01_funccall.ipynb 63
|
|
158
|
+
def call_func(fc_name, fc_inputs, ns):
|
|
159
|
+
"Call the function `fc_name` with the given `fc_inputs` using namespace `ns`."
|
|
160
|
+
if not isinstance(ns, abc.Mapping): ns = mk_ns(*ns)
|
|
161
|
+
func = ns[fc_name]
|
|
162
|
+
return func(**fc_inputs)
|
toolslm/md_hier.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from fastcore.utils import *
|
|
3
|
+
__all__ = ['markdown_to_dict', 'create_heading_dict']
|
|
4
|
+
|
|
5
|
+
def markdown_to_dict(markdown_content):
|
|
6
|
+
def clean_heading(text): return re.sub(r'[^A-Za-z0-9 ]+', '', text).strip()
|
|
7
|
+
|
|
8
|
+
lines = markdown_content.splitlines()
|
|
9
|
+
headings = []
|
|
10
|
+
|
|
11
|
+
# Parse headings with their levels and line numbers
|
|
12
|
+
for idx, line in enumerate(lines):
|
|
13
|
+
match = re.match(r'^(#{1,6})\s*(.*)', line)
|
|
14
|
+
if match:
|
|
15
|
+
level = len(match.group(1))
|
|
16
|
+
text = match.group(2).strip()
|
|
17
|
+
headings.append({'level': level, 'text': text, 'line': idx})
|
|
18
|
+
|
|
19
|
+
# Assign content to each heading, including subheadings
|
|
20
|
+
for i, h in enumerate(headings):
|
|
21
|
+
start = h['line'] # Include the heading line itself
|
|
22
|
+
# Find the end index: next heading of same or higher level
|
|
23
|
+
for j in range(i + 1, len(headings)):
|
|
24
|
+
if headings[j]['level'] <= h['level']:
|
|
25
|
+
end = headings[j]['line']
|
|
26
|
+
break
|
|
27
|
+
else: end = len(lines)
|
|
28
|
+
h['content'] = '\n'.join(lines[start:end]).strip()
|
|
29
|
+
|
|
30
|
+
# Build the dictionary with hierarchical keys
|
|
31
|
+
result,stack = {},[]
|
|
32
|
+
for h in headings:
|
|
33
|
+
stack = stack[:h['level'] - 1] + [clean_heading(h['text'])]
|
|
34
|
+
key = '.'.join(stack)
|
|
35
|
+
result[key] = h['content']
|
|
36
|
+
return dict2obj(result)
|
|
37
|
+
|
|
38
|
+
def create_heading_dict(text):
|
|
39
|
+
headings = re.findall(r'^#+.*', text, flags=re.MULTILINE)
|
|
40
|
+
result = {}
|
|
41
|
+
stack = [result]
|
|
42
|
+
prev_level = 0
|
|
43
|
+
|
|
44
|
+
for heading in headings:
|
|
45
|
+
level = heading.count('#')
|
|
46
|
+
title = heading.strip('#').strip()
|
|
47
|
+
while level <= prev_level:
|
|
48
|
+
stack.pop()
|
|
49
|
+
prev_level -= 1
|
|
50
|
+
new_dict = {}
|
|
51
|
+
stack[-1][title] = new_dict
|
|
52
|
+
stack.append(new_dict)
|
|
53
|
+
prev_level = level
|
|
54
|
+
return dict2obj(result)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
if __name__=='__main__':
|
|
58
|
+
md_content = """
|
|
59
|
+
# User
|
|
60
|
+
|
|
61
|
+
This is the User section.
|
|
62
|
+
|
|
63
|
+
## Tokens
|
|
64
|
+
|
|
65
|
+
Details about tokens.
|
|
66
|
+
|
|
67
|
+
### Value
|
|
68
|
+
|
|
69
|
+
The value of tokens.
|
|
70
|
+
|
|
71
|
+
Some more details.
|
|
72
|
+
|
|
73
|
+
## Settings
|
|
74
|
+
|
|
75
|
+
User settings information.
|
|
76
|
+
|
|
77
|
+
# Admin
|
|
78
|
+
|
|
79
|
+
Admin section.
|
|
80
|
+
|
|
81
|
+
## Users
|
|
82
|
+
|
|
83
|
+
Admin users management.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
result = markdown_to_dict(md_content)
|
|
87
|
+
#for key, value in result.items(): print(f'Key: {key}\nValue:\n{value}\n{"-"*40}')
|
|
88
|
+
|
|
89
|
+
def test_empty_content():
|
|
90
|
+
md_content = "# Empty Heading"
|
|
91
|
+
result = markdown_to_dict(md_content)
|
|
92
|
+
assert result['Empty Heading'] == '# Empty Heading'
|
|
93
|
+
|
|
94
|
+
def test_special_characters():
|
|
95
|
+
md_content = "# Heading *With* Special _Characters_!\nContent under heading."
|
|
96
|
+
result = markdown_to_dict(md_content)
|
|
97
|
+
assert 'Heading With Special Characters' in result
|
|
98
|
+
assert result['Heading With Special Characters'] == '# Heading *With* Special _Characters_!\nContent under heading.'
|
|
99
|
+
|
|
100
|
+
def test_duplicate_headings():
|
|
101
|
+
md_content = "# Duplicate\n## Duplicate\n### Duplicate\nContent under duplicate headings."
|
|
102
|
+
result = markdown_to_dict(md_content)
|
|
103
|
+
assert 'Duplicate' in result
|
|
104
|
+
assert 'Duplicate.Duplicate' in result
|
|
105
|
+
assert 'Duplicate.Duplicate.Duplicate' in result
|
|
106
|
+
assert result['Duplicate.Duplicate.Duplicate'] == '### Duplicate\nContent under duplicate headings.'
|
|
107
|
+
|
|
108
|
+
def test_no_content():
|
|
109
|
+
md_content = "# No Content Heading\n## Subheading"
|
|
110
|
+
result = markdown_to_dict(md_content)
|
|
111
|
+
assert result['No Content Heading'] == '# No Content Heading\n## Subheading'
|
|
112
|
+
assert result['No Content Heading.Subheading'] == '## Subheading'
|
|
113
|
+
|
|
114
|
+
def test_different_levels():
|
|
115
|
+
md_content = "### Level 3 Heading\nContent at level 3.\n# Level 1 Heading\nContent at level 1."
|
|
116
|
+
result = markdown_to_dict(md_content)
|
|
117
|
+
assert 'Level 3 Heading' in result
|
|
118
|
+
assert 'Level 1 Heading' in result
|
|
119
|
+
assert result['Level 3 Heading'] == '### Level 3 Heading\nContent at level 3.'
|
|
120
|
+
assert result['Level 1 Heading'] == '# Level 1 Heading\nContent at level 1.'
|
|
121
|
+
|
|
122
|
+
def test_parent_includes_subheadings():
|
|
123
|
+
md_content = "# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content."
|
|
124
|
+
result = markdown_to_dict(md_content)
|
|
125
|
+
assert result['Parent'] == '# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content.'
|
|
126
|
+
assert result['Parent.Child'] == '## Child\nChild content.\n### Grandchild\nGrandchild content.'
|
|
127
|
+
assert result['Parent.Child.Grandchild'] == '### Grandchild\nGrandchild content.'
|
|
128
|
+
|
|
129
|
+
test_empty_content()
|
|
130
|
+
test_special_characters()
|
|
131
|
+
test_duplicate_headings()
|
|
132
|
+
test_no_content()
|
|
133
|
+
test_different_levels()
|
|
134
|
+
test_parent_includes_subheadings()
|
|
135
|
+
print('tests passed')
|
|
136
|
+
|
toolslm/xml.py
CHANGED
|
@@ -50,40 +50,32 @@ def mk_doctype(content:str, # The document content
|
|
|
50
50
|
if source is None: source = hashlib.md5(content.encode()).hexdigest()[:8]
|
|
51
51
|
return doctype(_add_nls(str(source).strip()), _add_nls(content.strip()))
|
|
52
52
|
|
|
53
|
-
# %% ../00_xml.ipynb
|
|
53
|
+
# %% ../00_xml.ipynb 16
|
|
54
54
|
def mk_doc(index:int, # The document index
|
|
55
55
|
content:str, # The document content
|
|
56
|
-
source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
|
|
57
|
-
|
|
58
|
-
"Create an `ft` format tuple for a single doc in Anthropic's recommended format"
|
|
59
|
-
dt = mk_doctype(content, source)
|
|
60
|
-
content = ft('document_content', dt.content)
|
|
61
|
-
source = ft('source', dt.source)
|
|
62
|
-
return ft('document', source, content, index=index)
|
|
63
|
-
|
|
64
|
-
# %% ../00_xml.ipynb 18
|
|
65
|
-
def mk_doc(index:int, # The document index
|
|
66
|
-
content:str, # The document content
|
|
67
|
-
source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
|
|
56
|
+
source:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided
|
|
57
|
+
**kwargs
|
|
68
58
|
) -> tuple:
|
|
69
59
|
"Create an `ft` format tuple for a single doc in Anthropic's recommended format"
|
|
70
60
|
dt = mk_doctype(content, source)
|
|
71
61
|
content = Document_content(dt.content)
|
|
72
62
|
source = Source(dt.source)
|
|
73
|
-
return Document(source, content, index=index)
|
|
63
|
+
return Document(source, content, index=index, **kwargs)
|
|
74
64
|
|
|
75
|
-
# %% ../00_xml.ipynb
|
|
65
|
+
# %% ../00_xml.ipynb 19
|
|
76
66
|
def docs_xml(docs:list[str], # The content of each document
|
|
77
67
|
sources:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
|
|
78
|
-
prefix:bool=True # Include Anthropic's suggested prose intro?
|
|
68
|
+
prefix:bool=True, # Include Anthropic's suggested prose intro?
|
|
69
|
+
details:Optional[list]=None # Optional list of dicts with additional attrs for each doc
|
|
79
70
|
)->str:
|
|
80
71
|
"Create an XML string containing `docs` in Anthropic's recommended format"
|
|
81
72
|
pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
|
|
82
73
|
if sources is None: sources = [None]*len(docs)
|
|
83
|
-
|
|
74
|
+
if details is None: details = [{}]*len(docs)
|
|
75
|
+
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,sources,details)))
|
|
84
76
|
return pre + to_xml(Documents(docs))
|
|
85
77
|
|
|
86
|
-
# %% ../00_xml.ipynb
|
|
78
|
+
# %% ../00_xml.ipynb 26
|
|
87
79
|
def files2ctx(
|
|
88
80
|
fnames:list[Union[str,Path]], # List of file names to add to context
|
|
89
81
|
prefix:bool=True # Include Anthropic's suggested prose intro?
|
|
@@ -92,7 +84,7 @@ def files2ctx(
|
|
|
92
84
|
contents = [o.read_text() for o in fnames]
|
|
93
85
|
return docs_xml(contents, fnames, prefix=prefix)
|
|
94
86
|
|
|
95
|
-
# %% ../00_xml.ipynb
|
|
87
|
+
# %% ../00_xml.ipynb 29
|
|
96
88
|
@delegates(globtastic)
|
|
97
89
|
def folder2ctx(
|
|
98
90
|
folder:Union[str,Path], # Folder name containing files to add to context
|
|
@@ -102,7 +94,7 @@ def folder2ctx(
|
|
|
102
94
|
fnames = globtastic(folder, **kwargs)
|
|
103
95
|
return files2ctx(fnames, prefix=prefix)
|
|
104
96
|
|
|
105
|
-
# %% ../00_xml.ipynb
|
|
97
|
+
# %% ../00_xml.ipynb 31
|
|
106
98
|
@call_parse
|
|
107
99
|
@delegates(folder2ctx)
|
|
108
100
|
def folder2ctx_cli(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: toolslm
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.7
|
|
4
4
|
Summary: Tools to make language models a bit easier to use
|
|
5
5
|
Home-page: https://github.com/AnswerDotAI/toolslm
|
|
6
6
|
Author: Jeremy Howard
|
|
@@ -17,6 +17,10 @@ Requires-Python: >=3.9
|
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
19
|
Requires-Dist: fastcore >=1.5.47
|
|
20
|
+
Requires-Dist: beautifulsoup4
|
|
21
|
+
Requires-Dist: html2text
|
|
22
|
+
Requires-Dist: httpx
|
|
23
|
+
Requires-Dist: llms-txt
|
|
20
24
|
Provides-Extra: dev
|
|
21
25
|
|
|
22
26
|
# toolslm
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
toolslm/__init__.py,sha256=R9xOYoYrWKcfO5zvTeGC3m_eDNOvxMd8CocQs2tLufo,22
|
|
2
|
+
toolslm/_modidx.py,sha256=EIl2FBWhcZUS46r1AU0wURYg2O6Z3aXTPUr3p8Smrqk,3882
|
|
3
|
+
toolslm/download.py,sha256=tf0TGFzJ6qbxCjjuG9iRC2i6lutcF9GviWY0fJc_lSU,4378
|
|
4
|
+
toolslm/funccall.py,sha256=hSvBvfMv-YcBSUUs4-NrYu1f8jg4gfu2s82cPyIHVkU,6534
|
|
5
|
+
toolslm/md_hier.py,sha256=hkCjuOfIFWuMEiM2_XCoD9QIBjy9huLOSvpX_bMdn0Y,4645
|
|
6
|
+
toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
|
|
7
|
+
toolslm/xml.py,sha256=Alcd96KfNO8LklVefyc51LbXBoVLRSgifrpMVZPqYsc,4120
|
|
8
|
+
toolslm-0.0.7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
toolslm-0.0.7.dist-info/METADATA,sha256=sdRs3kCMl1xI8Z1if4xsGWuGaX9hbYGB0zs0BbRhQp0,3882
|
|
10
|
+
toolslm-0.0.7.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
11
|
+
toolslm-0.0.7.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
+
toolslm-0.0.7.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
+
toolslm-0.0.7.dist-info/RECORD,,
|
toolslm-0.0.5.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
toolslm/__init__.py,sha256=S7u1lbuWmM3A3ajykBialmPoJUK6Jg-WmNqM-9OZFdk,22
|
|
2
|
-
toolslm/_modidx.py,sha256=6T36Q2cYKH0lp9Tt9Us8xpZV-Z0FYqrtZGu2ZykHDkg,2068
|
|
3
|
-
toolslm/funccall.py,sha256=mzWNLdZY6cYk-I3O5noRiEB089mPwJhnRQFsS5_JYDs,3856
|
|
4
|
-
toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
|
|
5
|
-
toolslm/xml.py,sha256=3rMyYK9VOvY3NElSNEoMGIe4iM8InKM-gbvjrK-2Ub0,4421
|
|
6
|
-
toolslm-0.0.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
7
|
-
toolslm-0.0.5.dist-info/METADATA,sha256=cJmpD5wO6AO62izboiKw8KeifsGy9duhZnJW_IQSXDg,3782
|
|
8
|
-
toolslm-0.0.5.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
9
|
-
toolslm-0.0.5.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
10
|
-
toolslm-0.0.5.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
11
|
-
toolslm-0.0.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|