toolslm 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolslm/__init__.py +1 -1
- toolslm/_modidx.py +10 -1
- toolslm/download.py +97 -0
- toolslm/funccall.py +5 -4
- toolslm/xml.py +13 -20
- {toolslm-0.0.4.dist-info → toolslm-0.0.6.dist-info}/METADATA +5 -1
- toolslm-0.0.6.dist-info/RECORD +12 -0
- toolslm-0.0.4.dist-info/RECORD +0 -11
- {toolslm-0.0.4.dist-info → toolslm-0.0.6.dist-info}/LICENSE +0 -0
- {toolslm-0.0.4.dist-info → toolslm-0.0.6.dist-info}/WHEEL +0 -0
- {toolslm-0.0.4.dist-info → toolslm-0.0.6.dist-info}/entry_points.txt +0 -0
- {toolslm-0.0.4.dist-info → toolslm-0.0.6.dist-info}/top_level.txt +0 -0
toolslm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.6"
|
toolslm/_modidx.py
CHANGED
|
@@ -5,7 +5,16 @@ d = { 'settings': { 'branch': 'main',
|
|
|
5
5
|
'doc_host': 'https://AnswerDotAI.github.io',
|
|
6
6
|
'git_url': 'https://github.com/AnswerDotAI/toolslm',
|
|
7
7
|
'lib_path': 'toolslm'},
|
|
8
|
-
'syms': { 'toolslm.
|
|
8
|
+
'syms': { 'toolslm.download': { 'toolslm.download._tryget': ('download.html#_tryget', 'toolslm/download.py'),
|
|
9
|
+
'toolslm.download.clean_md': ('download.html#clean_md', 'toolslm/download.py'),
|
|
10
|
+
'toolslm.download.find_docs': ('download.html#find_docs', 'toolslm/download.py'),
|
|
11
|
+
'toolslm.download.get_llmstxt': ('download.html#get_llmstxt', 'toolslm/download.py'),
|
|
12
|
+
'toolslm.download.html2md': ('download.html#html2md', 'toolslm/download.py'),
|
|
13
|
+
'toolslm.download.read_docs': ('download.html#read_docs', 'toolslm/download.py'),
|
|
14
|
+
'toolslm.download.read_html': ('download.html#read_html', 'toolslm/download.py'),
|
|
15
|
+
'toolslm.download.read_md': ('download.html#read_md', 'toolslm/download.py'),
|
|
16
|
+
'toolslm.download.split_url': ('download.html#split_url', 'toolslm/download.py')},
|
|
17
|
+
'toolslm.funccall': { 'toolslm.funccall._copy_loc': ('funccall.html#_copy_loc', 'toolslm/funccall.py'),
|
|
9
18
|
'toolslm.funccall._param': ('funccall.html#_param', 'toolslm/funccall.py'),
|
|
10
19
|
'toolslm.funccall._run': ('funccall.html#_run', 'toolslm/funccall.py'),
|
|
11
20
|
'toolslm.funccall._types': ('funccall.html#_types', 'toolslm/funccall.py'),
|
toolslm/download.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../03_download.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto 0
|
|
4
|
+
__all__ = ['clean_md', 'read_md', 'html2md', 'read_html', 'get_llmstxt', 'split_url', 'find_docs', 'read_docs']
|
|
5
|
+
|
|
6
|
+
# %% ../03_download.ipynb 2
|
|
7
|
+
from fastcore.utils import *
|
|
8
|
+
from httpx import get
|
|
9
|
+
from fastcore.meta import delegates
|
|
10
|
+
from llms_txt import *
|
|
11
|
+
|
|
12
|
+
from html2text import HTML2Text
|
|
13
|
+
from bs4 import BeautifulSoup
|
|
14
|
+
from urllib.parse import urlparse, urljoin
|
|
15
|
+
|
|
16
|
+
# %% ../03_download.ipynb 4
|
|
17
|
+
def clean_md(text, rm_comments=True, rm_details=True):
|
|
18
|
+
"Remove comments and `<details>` sections from `text`"
|
|
19
|
+
if rm_comments: text = re.sub(r'\n?<!--.*?-->\n?', '', text, flags=re.DOTALL)
|
|
20
|
+
if rm_details: text = re.sub(r'\n?<details>.*?</details>\n?', '', text, flags=re.DOTALL)
|
|
21
|
+
return text
|
|
22
|
+
|
|
23
|
+
# %% ../03_download.ipynb 5
|
|
24
|
+
@delegates(get)
|
|
25
|
+
def read_md(url, rm_comments=True, rm_details=True, **kwargs):
|
|
26
|
+
"Read text from `url` and clean with `clean_docs`"
|
|
27
|
+
return clean_md(get(url, **kwargs).text, rm_comments=rm_comments, rm_details=rm_details)
|
|
28
|
+
|
|
29
|
+
# %% ../03_download.ipynb 7
|
|
30
|
+
def html2md(s:str):
|
|
31
|
+
"Convert `s` from HTML to markdown"
|
|
32
|
+
o = HTML2Text(bodywidth=5000)
|
|
33
|
+
o.ignore_links = True
|
|
34
|
+
o.mark_code = True
|
|
35
|
+
o.ignore_images = True
|
|
36
|
+
return o.handle(s)
|
|
37
|
+
|
|
38
|
+
# %% ../03_download.ipynb 8
|
|
39
|
+
def read_html(url, sel=None, rm_comments=True, rm_details=True):
|
|
40
|
+
"Get `url`, optionally selecting CSS selector `sel`, and convert to clean markdown"
|
|
41
|
+
page = get(url).text
|
|
42
|
+
if sel:
|
|
43
|
+
soup = BeautifulSoup(page, 'html.parser')
|
|
44
|
+
page = str(soup.find(sel))
|
|
45
|
+
md = html2md(page)
|
|
46
|
+
return clean_md(md, rm_comments, rm_details=rm_details)
|
|
47
|
+
|
|
48
|
+
# %% ../03_download.ipynb 10
|
|
49
|
+
def get_llmstxt(url, optional=False, n_workers=None):
|
|
50
|
+
"Get llms.txt file from and expand it with `llms_txt.create_ctx()`"
|
|
51
|
+
if not url.endswith('llms.txt'): return None
|
|
52
|
+
resp = get(url)
|
|
53
|
+
if resp.status_code!=200: return None
|
|
54
|
+
return create_ctx(resp.text, optional=optional, n_workers=n_workers)
|
|
55
|
+
|
|
56
|
+
# %% ../03_download.ipynb 12
|
|
57
|
+
def split_url(url):
|
|
58
|
+
"Split `url` into base, path, and file name, normalising name to '/' if empty"
|
|
59
|
+
parsed = urlparse(url.strip('/'))
|
|
60
|
+
base = f"{parsed.scheme}://{parsed.netloc}"
|
|
61
|
+
path,spl,fname = parsed.path.rpartition('/')
|
|
62
|
+
fname = spl+fname
|
|
63
|
+
if not path and not fname: path='/'
|
|
64
|
+
return base,path,fname
|
|
65
|
+
|
|
66
|
+
# %% ../03_download.ipynb 14
|
|
67
|
+
def _tryget(url):
|
|
68
|
+
"Return response from `url` if `status_code!=404`, otherwise `None`"
|
|
69
|
+
res = get(url)
|
|
70
|
+
return None if res.status_code==404 else url
|
|
71
|
+
|
|
72
|
+
# %% ../03_download.ipynb 15
|
|
73
|
+
def find_docs(url):
|
|
74
|
+
"If available, return LLM-friendly llms.txt context or markdown file location from `url`"
|
|
75
|
+
base,path,fname = split_url(url)
|
|
76
|
+
url = (base+path+fname).strip('/')
|
|
77
|
+
if fname=='/llms.txt': return url
|
|
78
|
+
if Path(fname).suffix in('.md', '.txt', '.rst'): return _tryget(url)
|
|
79
|
+
if '.' in fname: return _tryget(url+'.md')
|
|
80
|
+
res = _tryget(url+'/llms.txt')
|
|
81
|
+
if res: return res
|
|
82
|
+
res = _tryget(url+'/index.md')
|
|
83
|
+
if res: return res
|
|
84
|
+
res = _tryget(url+'/index.html.md')
|
|
85
|
+
if res: return res
|
|
86
|
+
res = _tryget(url+'/index-commonmark.md')
|
|
87
|
+
if res: return res
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
# %% ../03_download.ipynb 19
|
|
91
|
+
def read_docs(url, optional=False, n_workers=None, rm_comments=True, rm_details=True):
|
|
92
|
+
"If available, return LLM-friendly llms.txt context or markdown file response for `url`"
|
|
93
|
+
url = find_docs(url)
|
|
94
|
+
if not url: return
|
|
95
|
+
if url.endswith('/llms.txt'): res = get_llmstxt(url, optional=optional, n_workers=n_workers)
|
|
96
|
+
else: res = get(url).text
|
|
97
|
+
return clean_md(res, rm_comments=rm_comments, rm_details=rm_details)
|
toolslm/funccall.py
CHANGED
|
@@ -16,6 +16,7 @@ def _types(t:type)->tuple[str,Optional[str]]:
|
|
|
16
16
|
"Tuple of json schema type name and (if appropriate) array item name."
|
|
17
17
|
if t is empty: raise TypeError('Missing type')
|
|
18
18
|
tmap = {int:"integer", float:"number", str:"string", bool:"boolean", list:"array", dict:"object"}
|
|
19
|
+
tmap.update({k.__name__: v for k, v in tmap.items()})
|
|
19
20
|
if getattr(t, '__origin__', None) in (list,tuple): return "array", tmap.get(t.__args__[0], "object")
|
|
20
21
|
else: return tmap[t], None
|
|
21
22
|
|
|
@@ -45,11 +46,11 @@ def get_schema(f:callable, pname='input_schema')->dict:
|
|
|
45
46
|
if ret.docment: desc += f'\n- description: {ret.docment}'
|
|
46
47
|
return {'name':f.__name__, 'description':desc, pname:paramd}
|
|
47
48
|
|
|
48
|
-
# %% ../01_funccall.ipynb
|
|
49
|
+
# %% ../01_funccall.ipynb 24
|
|
49
50
|
import ast, time, signal, traceback
|
|
50
51
|
from fastcore.utils import *
|
|
51
52
|
|
|
52
|
-
# %% ../01_funccall.ipynb
|
|
53
|
+
# %% ../01_funccall.ipynb 25
|
|
53
54
|
def _copy_loc(new, orig):
|
|
54
55
|
"Copy location information from original node to new node and all children."
|
|
55
56
|
new = ast.copy_location(new, orig)
|
|
@@ -58,7 +59,7 @@ def _copy_loc(new, orig):
|
|
|
58
59
|
elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])
|
|
59
60
|
return new
|
|
60
61
|
|
|
61
|
-
# %% ../01_funccall.ipynb
|
|
62
|
+
# %% ../01_funccall.ipynb 27
|
|
62
63
|
def _run(code:str ):
|
|
63
64
|
"Run `code`, returning final expression (similar to IPython)"
|
|
64
65
|
tree = ast.parse(code)
|
|
@@ -81,7 +82,7 @@ def _run(code:str ):
|
|
|
81
82
|
if _result is not None: return _result
|
|
82
83
|
return stdout_buffer.getvalue().strip()
|
|
83
84
|
|
|
84
|
-
# %% ../01_funccall.ipynb
|
|
85
|
+
# %% ../01_funccall.ipynb 32
|
|
85
86
|
def python(code, # Code to execute
|
|
86
87
|
timeout=5 # Maximum run time in seconds before a `TimeoutError` is raised
|
|
87
88
|
): # Result of last node, if it's an expression, or `None` otherwise
|
toolslm/xml.py
CHANGED
|
@@ -37,6 +37,7 @@ doctype = namedtuple('doctype', ['source', 'content'])
|
|
|
37
37
|
# %% ../00_xml.ipynb 11
|
|
38
38
|
def _add_nls(s):
|
|
39
39
|
"Add newlines to start and end of `s` if missing"
|
|
40
|
+
if not s: return s
|
|
40
41
|
if s[ 0]!='\n': s = '\n'+s
|
|
41
42
|
if s[-1]!='\n': s = s+'\n'
|
|
42
43
|
return s
|
|
@@ -49,40 +50,32 @@ def mk_doctype(content:str, # The document content
|
|
|
49
50
|
if source is None: source = hashlib.md5(content.encode()).hexdigest()[:8]
|
|
50
51
|
return doctype(_add_nls(str(source).strip()), _add_nls(content.strip()))
|
|
51
52
|
|
|
52
|
-
# %% ../00_xml.ipynb
|
|
53
|
+
# %% ../00_xml.ipynb 16
|
|
53
54
|
def mk_doc(index:int, # The document index
|
|
54
55
|
content:str, # The document content
|
|
55
|
-
source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
|
|
56
|
-
|
|
57
|
-
"Create an `ft` format tuple for a single doc in Anthropic's recommended format"
|
|
58
|
-
dt = mk_doctype(content, source)
|
|
59
|
-
content = ft('document_content', dt.content)
|
|
60
|
-
source = ft('source', dt.source)
|
|
61
|
-
return ft('document', source, content, index=index)
|
|
62
|
-
|
|
63
|
-
# %% ../00_xml.ipynb 18
|
|
64
|
-
def mk_doc(index:int, # The document index
|
|
65
|
-
content:str, # The document content
|
|
66
|
-
source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
|
|
56
|
+
source:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided
|
|
57
|
+
**kwargs
|
|
67
58
|
) -> tuple:
|
|
68
59
|
"Create an `ft` format tuple for a single doc in Anthropic's recommended format"
|
|
69
60
|
dt = mk_doctype(content, source)
|
|
70
61
|
content = Document_content(dt.content)
|
|
71
62
|
source = Source(dt.source)
|
|
72
|
-
return Document(source, content, index=index)
|
|
63
|
+
return Document(source, content, index=index, **kwargs)
|
|
73
64
|
|
|
74
|
-
# %% ../00_xml.ipynb
|
|
65
|
+
# %% ../00_xml.ipynb 19
|
|
75
66
|
def docs_xml(docs:list[str], # The content of each document
|
|
76
67
|
sources:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
|
|
77
|
-
prefix:bool=True # Include Anthropic's suggested prose intro?
|
|
68
|
+
prefix:bool=True, # Include Anthropic's suggested prose intro?
|
|
69
|
+
details:Optional[list]=None # Optional list of dicts with additional attrs for each doc
|
|
78
70
|
)->str:
|
|
79
71
|
"Create an XML string containing `docs` in Anthropic's recommended format"
|
|
80
72
|
pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
|
|
81
73
|
if sources is None: sources = [None]*len(docs)
|
|
82
|
-
|
|
74
|
+
if details is None: details = [{}]*len(docs)
|
|
75
|
+
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,sources,details)))
|
|
83
76
|
return pre + to_xml(Documents(docs))
|
|
84
77
|
|
|
85
|
-
# %% ../00_xml.ipynb
|
|
78
|
+
# %% ../00_xml.ipynb 26
|
|
86
79
|
def files2ctx(
|
|
87
80
|
fnames:list[Union[str,Path]], # List of file names to add to context
|
|
88
81
|
prefix:bool=True # Include Anthropic's suggested prose intro?
|
|
@@ -91,7 +84,7 @@ def files2ctx(
|
|
|
91
84
|
contents = [o.read_text() for o in fnames]
|
|
92
85
|
return docs_xml(contents, fnames, prefix=prefix)
|
|
93
86
|
|
|
94
|
-
# %% ../00_xml.ipynb
|
|
87
|
+
# %% ../00_xml.ipynb 29
|
|
95
88
|
@delegates(globtastic)
|
|
96
89
|
def folder2ctx(
|
|
97
90
|
folder:Union[str,Path], # Folder name containing files to add to context
|
|
@@ -101,7 +94,7 @@ def folder2ctx(
|
|
|
101
94
|
fnames = globtastic(folder, **kwargs)
|
|
102
95
|
return files2ctx(fnames, prefix=prefix)
|
|
103
96
|
|
|
104
|
-
# %% ../00_xml.ipynb
|
|
97
|
+
# %% ../00_xml.ipynb 31
|
|
105
98
|
@call_parse
|
|
106
99
|
@delegates(folder2ctx)
|
|
107
100
|
def folder2ctx_cli(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: toolslm
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.6
|
|
4
4
|
Summary: Tools to make language models a bit easier to use
|
|
5
5
|
Home-page: https://github.com/AnswerDotAI/toolslm
|
|
6
6
|
Author: Jeremy Howard
|
|
@@ -17,6 +17,10 @@ Requires-Python: >=3.9
|
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
19
|
Requires-Dist: fastcore >=1.5.47
|
|
20
|
+
Requires-Dist: beautifulsoup4
|
|
21
|
+
Requires-Dist: html2text
|
|
22
|
+
Requires-Dist: httpx
|
|
23
|
+
Requires-Dist: llms-txt
|
|
20
24
|
Provides-Extra: dev
|
|
21
25
|
|
|
22
26
|
# toolslm
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
toolslm/__init__.py,sha256=QiiYsv0kcJaB8wCWyT-FnI2b6be87HA-CrrIUn8LQhg,22
|
|
2
|
+
toolslm/_modidx.py,sha256=FiHwMAAjvPdu7kN0pA1OJTJbUg0ddo0o12_C9JUFPDc,3103
|
|
3
|
+
toolslm/download.py,sha256=BIhmbDSxM__57tukac63iwPx5sXIfbjYp7gh_fhp4Gw,3621
|
|
4
|
+
toolslm/funccall.py,sha256=ZXfzhP0N5cex7n8QHuxDfUb0BJX1iI1inFb064LAGlc,3914
|
|
5
|
+
toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
|
|
6
|
+
toolslm/xml.py,sha256=Alcd96KfNO8LklVefyc51LbXBoVLRSgifrpMVZPqYsc,4120
|
|
7
|
+
toolslm-0.0.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
8
|
+
toolslm-0.0.6.dist-info/METADATA,sha256=_J7uXsh_qRX_wVKxssQjFpRH7zZjAwC0av1J5UIfTdk,3882
|
|
9
|
+
toolslm-0.0.6.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
10
|
+
toolslm-0.0.6.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
11
|
+
toolslm-0.0.6.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
12
|
+
toolslm-0.0.6.dist-info/RECORD,,
|
toolslm-0.0.4.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
toolslm/__init__.py,sha256=1mptEzQihbdyqqzMgdns_j5ZGK9gz7hR2bsgA_TnjO4,22
|
|
2
|
-
toolslm/_modidx.py,sha256=6T36Q2cYKH0lp9Tt9Us8xpZV-Z0FYqrtZGu2ZykHDkg,2068
|
|
3
|
-
toolslm/funccall.py,sha256=mzWNLdZY6cYk-I3O5noRiEB089mPwJhnRQFsS5_JYDs,3856
|
|
4
|
-
toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
|
|
5
|
-
toolslm/xml.py,sha256=dSJOHqSWnZlMK1Qf3396ISSaBHf5miNlLSYCixYB9ng,4398
|
|
6
|
-
toolslm-0.0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
7
|
-
toolslm-0.0.4.dist-info/METADATA,sha256=9Ni6CdLgvxTCx7LqIrnNyUgrfu0t2Wsdabp9jZlFNvw,3782
|
|
8
|
-
toolslm-0.0.4.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
9
|
-
toolslm-0.0.4.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
10
|
-
toolslm-0.0.4.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
11
|
-
toolslm-0.0.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|