toolslm 0.0.5__tar.gz → 0.3.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolslm-0.3.13/PKG-INFO +90 -0
- toolslm-0.3.13/README.md +55 -0
- toolslm-0.3.13/pyproject.toml +11 -0
- {toolslm-0.0.5 → toolslm-0.3.13}/settings.ini +5 -3
- toolslm-0.3.13/toolslm/__init__.py +1 -0
- toolslm-0.3.13/toolslm/_modidx.py +54 -0
- toolslm-0.3.13/toolslm/download.py +109 -0
- toolslm-0.3.13/toolslm/funccall.py +274 -0
- toolslm-0.3.13/toolslm/md_hier.py +307 -0
- {toolslm-0.0.5 → toolslm-0.3.13}/toolslm/shell.py +5 -5
- toolslm-0.3.13/toolslm/xml.py +214 -0
- toolslm-0.3.13/toolslm.egg-info/PKG-INFO +90 -0
- {toolslm-0.0.5 → toolslm-0.3.13}/toolslm.egg-info/SOURCES.txt +3 -0
- toolslm-0.3.13/toolslm.egg-info/requires.txt +4 -0
- toolslm-0.0.5/PKG-INFO +0 -150
- toolslm-0.0.5/README.md +0 -129
- toolslm-0.0.5/toolslm/__init__.py +0 -1
- toolslm-0.0.5/toolslm/_modidx.py +0 -24
- toolslm-0.0.5/toolslm/funccall.py +0 -95
- toolslm-0.0.5/toolslm/xml.py +0 -112
- toolslm-0.0.5/toolslm.egg-info/PKG-INFO +0 -150
- toolslm-0.0.5/toolslm.egg-info/requires.txt +0 -3
- {toolslm-0.0.5 → toolslm-0.3.13}/LICENSE +0 -0
- {toolslm-0.0.5 → toolslm-0.3.13}/MANIFEST.in +0 -0
- {toolslm-0.0.5 → toolslm-0.3.13}/setup.cfg +0 -0
- {toolslm-0.0.5 → toolslm-0.3.13}/setup.py +0 -0
- {toolslm-0.0.5 → toolslm-0.3.13}/toolslm.egg-info/dependency_links.txt +0 -0
- {toolslm-0.0.5 → toolslm-0.3.13}/toolslm.egg-info/entry_points.txt +0 -0
- {toolslm-0.0.5 → toolslm-0.3.13}/toolslm.egg-info/not-zip-safe +0 -0
- {toolslm-0.0.5 → toolslm-0.3.13}/toolslm.egg-info/top_level.txt +0 -0
toolslm-0.3.13/PKG-INFO
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: toolslm
|
|
3
|
+
Version: 0.3.13
|
|
4
|
+
Summary: Tools to make language models a bit easier to use
|
|
5
|
+
Home-page: https://github.com/AnswerDotAI/toolslm
|
|
6
|
+
Author: Jeremy Howard
|
|
7
|
+
Author-email: j@fast.ai
|
|
8
|
+
License: Apache Software License 2.0
|
|
9
|
+
Keywords: nbdev jupyter notebook python
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Natural Language :: English
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: fastcore>=1.9.7
|
|
20
|
+
Requires-Dist: httpx
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Dynamic: author
|
|
23
|
+
Dynamic: author-email
|
|
24
|
+
Dynamic: classifier
|
|
25
|
+
Dynamic: description
|
|
26
|
+
Dynamic: description-content-type
|
|
27
|
+
Dynamic: home-page
|
|
28
|
+
Dynamic: keywords
|
|
29
|
+
Dynamic: license
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
Dynamic: provides-extra
|
|
32
|
+
Dynamic: requires-dist
|
|
33
|
+
Dynamic: requires-python
|
|
34
|
+
Dynamic: summary
|
|
35
|
+
|
|
36
|
+
# toolslm
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
40
|
+
|
|
41
|
+
This is a work in progress…
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
``` sh
|
|
46
|
+
pip install toolslm
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## How to use
|
|
50
|
+
|
|
51
|
+
### Context creation
|
|
52
|
+
|
|
53
|
+
toolslm has some helpers to make it easier to generate XML context from
|
|
54
|
+
files, for instance
|
|
55
|
+
[`folder2ctx`](https://AnswerDotAI.github.io/toolslm/xml.html#folder2ctx):
|
|
56
|
+
|
|
57
|
+
``` python
|
|
58
|
+
print(folder2ctx('samples', prefix=False, file_glob='*.py'))
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
<documents><document index="1"><src>
|
|
62
|
+
samples/sample_core.py
|
|
63
|
+
</src><document-content>
|
|
64
|
+
import inspect
|
|
65
|
+
empty = inspect.Parameter.empty
|
|
66
|
+
models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'
|
|
67
|
+
</document-content></document></documents>
|
|
68
|
+
|
|
69
|
+
JSON doesn’t map as nicely to XML as the `ft` data structure from
|
|
70
|
+
`fastcore.xml`, but for simple XML trees it can be convenient. The
|
|
71
|
+
[`json_to_xml`](https://AnswerDotAI.github.io/toolslm/xml.html#json_to_xml)
|
|
72
|
+
function handles that conversion:
|
|
73
|
+
|
|
74
|
+
``` python
|
|
75
|
+
a = dict(surname='Howard', firstnames=['Jeremy','Peter'],
|
|
76
|
+
address=dict(state='Queensland',country='Australia'))
|
|
77
|
+
print(json_to_xml(a, 'person'))
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
<person>
|
|
81
|
+
<surname>Howard</surname>
|
|
82
|
+
<firstnames>
|
|
83
|
+
<item>Jeremy</item>
|
|
84
|
+
<item>Peter</item>
|
|
85
|
+
</firstnames>
|
|
86
|
+
<address>
|
|
87
|
+
<state>Queensland</state>
|
|
88
|
+
<country>Australia</country>
|
|
89
|
+
</address>
|
|
90
|
+
</person>
|
toolslm-0.3.13/README.md
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# toolslm
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
5
|
+
|
|
6
|
+
This is a work in progress…
|
|
7
|
+
|
|
8
|
+
## Install
|
|
9
|
+
|
|
10
|
+
``` sh
|
|
11
|
+
pip install toolslm
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## How to use
|
|
15
|
+
|
|
16
|
+
### Context creation
|
|
17
|
+
|
|
18
|
+
toolslm has some helpers to make it easier to generate XML context from
|
|
19
|
+
files, for instance
|
|
20
|
+
[`folder2ctx`](https://AnswerDotAI.github.io/toolslm/xml.html#folder2ctx):
|
|
21
|
+
|
|
22
|
+
``` python
|
|
23
|
+
print(folder2ctx('samples', prefix=False, file_glob='*.py'))
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
<documents><document index="1"><src>
|
|
27
|
+
samples/sample_core.py
|
|
28
|
+
</src><document-content>
|
|
29
|
+
import inspect
|
|
30
|
+
empty = inspect.Parameter.empty
|
|
31
|
+
models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'
|
|
32
|
+
</document-content></document></documents>
|
|
33
|
+
|
|
34
|
+
JSON doesn’t map as nicely to XML as the `ft` data structure from
|
|
35
|
+
`fastcore.xml`, but for simple XML trees it can be convenient. The
|
|
36
|
+
[`json_to_xml`](https://AnswerDotAI.github.io/toolslm/xml.html#json_to_xml)
|
|
37
|
+
function handles that conversion:
|
|
38
|
+
|
|
39
|
+
``` python
|
|
40
|
+
a = dict(surname='Howard', firstnames=['Jeremy','Peter'],
|
|
41
|
+
address=dict(state='Queensland',country='Australia'))
|
|
42
|
+
print(json_to_xml(a, 'person'))
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
<person>
|
|
46
|
+
<surname>Howard</surname>
|
|
47
|
+
<firstnames>
|
|
48
|
+
<item>Jeremy</item>
|
|
49
|
+
<item>Peter</item>
|
|
50
|
+
</firstnames>
|
|
51
|
+
<address>
|
|
52
|
+
<state>Queensland</state>
|
|
53
|
+
<country>Australia</country>
|
|
54
|
+
</address>
|
|
55
|
+
</person>
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name="toolslm"
|
|
7
|
+
requires-python=">=3.9"
|
|
8
|
+
dynamic = [ "keywords", "description", "version", "dependencies", "optional-dependencies", "readme", "license", "authors", "classifiers", "entry-points", "scripts", "urls"]
|
|
9
|
+
|
|
10
|
+
[tool.uv]
|
|
11
|
+
cache-keys = [{ file = "pyproject.toml" }, { file = "settings.ini" }, { file = "setup.py" }]
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
[DEFAULT]
|
|
2
2
|
repo = toolslm
|
|
3
3
|
lib_name = toolslm
|
|
4
|
-
version = 0.
|
|
4
|
+
version = 0.3.13
|
|
5
5
|
min_python = 3.9
|
|
6
6
|
license = apache2
|
|
7
7
|
black_formatting = False
|
|
8
|
-
requirements = fastcore>=1.
|
|
8
|
+
requirements = fastcore>=1.9.7 httpx
|
|
9
9
|
doc_path = _docs
|
|
10
10
|
lib_path = toolslm
|
|
11
11
|
nbs_path = .
|
|
@@ -35,5 +35,7 @@ clean_ids = True
|
|
|
35
35
|
clear_all = False
|
|
36
36
|
conda_user = fastai
|
|
37
37
|
console_scripts = folder2ctx=toolslm.xml:folder2ctx_cli
|
|
38
|
-
cell_number =
|
|
38
|
+
cell_number = False
|
|
39
|
+
skip_procs =
|
|
40
|
+
update_pyproject = True
|
|
39
41
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.13"
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Autogenerated by nbdev
|
|
2
|
+
|
|
3
|
+
d = { 'settings': { 'branch': 'main',
|
|
4
|
+
'doc_baseurl': '/toolslm',
|
|
5
|
+
'doc_host': 'https://AnswerDotAI.github.io',
|
|
6
|
+
'git_url': 'https://github.com/AnswerDotAI/toolslm',
|
|
7
|
+
'lib_path': 'toolslm'},
|
|
8
|
+
'syms': { 'toolslm.download': { 'toolslm.download._tryget': ('download.html#_tryget', 'toolslm/download.py'),
|
|
9
|
+
'toolslm.download.clean_md': ('download.html#clean_md', 'toolslm/download.py'),
|
|
10
|
+
'toolslm.download.find_docs': ('download.html#find_docs', 'toolslm/download.py'),
|
|
11
|
+
'toolslm.download.get_llmstxt': ('download.html#get_llmstxt', 'toolslm/download.py'),
|
|
12
|
+
'toolslm.download.html2md': ('download.html#html2md', 'toolslm/download.py'),
|
|
13
|
+
'toolslm.download.read_docs': ('download.html#read_docs', 'toolslm/download.py'),
|
|
14
|
+
'toolslm.download.read_html': ('download.html#read_html', 'toolslm/download.py'),
|
|
15
|
+
'toolslm.download.read_md': ('download.html#read_md', 'toolslm/download.py'),
|
|
16
|
+
'toolslm.download.split_url': ('download.html#split_url', 'toolslm/download.py')},
|
|
17
|
+
'toolslm.funccall': { 'toolslm.funccall._coerce_inputs': ('funccall.html#_coerce_inputs', 'toolslm/funccall.py'),
|
|
18
|
+
'toolslm.funccall._copy_loc': ('funccall.html#_copy_loc', 'toolslm/funccall.py'),
|
|
19
|
+
'toolslm.funccall._get_nested_schema': ('funccall.html#_get_nested_schema', 'toolslm/funccall.py'),
|
|
20
|
+
'toolslm.funccall._handle_container': ('funccall.html#_handle_container', 'toolslm/funccall.py'),
|
|
21
|
+
'toolslm.funccall._handle_type': ('funccall.html#_handle_type', 'toolslm/funccall.py'),
|
|
22
|
+
'toolslm.funccall._is_container': ('funccall.html#_is_container', 'toolslm/funccall.py'),
|
|
23
|
+
'toolslm.funccall._is_parameterized': ('funccall.html#_is_parameterized', 'toolslm/funccall.py'),
|
|
24
|
+
'toolslm.funccall._param': ('funccall.html#_param', 'toolslm/funccall.py'),
|
|
25
|
+
'toolslm.funccall._process_property': ('funccall.html#_process_property', 'toolslm/funccall.py'),
|
|
26
|
+
'toolslm.funccall._run': ('funccall.html#_run', 'toolslm/funccall.py'),
|
|
27
|
+
'toolslm.funccall._types': ('funccall.html#_types', 'toolslm/funccall.py'),
|
|
28
|
+
'toolslm.funccall.call_func': ('funccall.html#call_func', 'toolslm/funccall.py'),
|
|
29
|
+
'toolslm.funccall.call_func_async': ('funccall.html#call_func_async', 'toolslm/funccall.py'),
|
|
30
|
+
'toolslm.funccall.get_schema': ('funccall.html#get_schema', 'toolslm/funccall.py'),
|
|
31
|
+
'toolslm.funccall.mk_ns': ('funccall.html#mk_ns', 'toolslm/funccall.py'),
|
|
32
|
+
'toolslm.funccall.mk_param': ('funccall.html#mk_param', 'toolslm/funccall.py'),
|
|
33
|
+
'toolslm.funccall.mk_tool': ('funccall.html#mk_tool', 'toolslm/funccall.py'),
|
|
34
|
+
'toolslm.funccall.python': ('funccall.html#python', 'toolslm/funccall.py'),
|
|
35
|
+
'toolslm.funccall.schema2sig': ('funccall.html#schema2sig', 'toolslm/funccall.py')},
|
|
36
|
+
'toolslm.md_hier': {},
|
|
37
|
+
'toolslm.shell': { 'toolslm.shell.TerminalInteractiveShell.run_cell': ( 'shell.html#terminalinteractiveshell.run_cell',
|
|
38
|
+
'toolslm/shell.py'),
|
|
39
|
+
'toolslm.shell.get_shell': ('shell.html#get_shell', 'toolslm/shell.py')},
|
|
40
|
+
'toolslm.xml': { 'toolslm.xml._add_nls': ('xml.html#_add_nls', 'toolslm/xml.py'),
|
|
41
|
+
'toolslm.xml.cell2out': ('xml.html#cell2out', 'toolslm/xml.py'),
|
|
42
|
+
'toolslm.xml.cell2xml': ('xml.html#cell2xml', 'toolslm/xml.py'),
|
|
43
|
+
'toolslm.xml.docs_xml': ('xml.html#docs_xml', 'toolslm/xml.py'),
|
|
44
|
+
'toolslm.xml.files2ctx': ('xml.html#files2ctx', 'toolslm/xml.py'),
|
|
45
|
+
'toolslm.xml.folder2ctx': ('xml.html#folder2ctx', 'toolslm/xml.py'),
|
|
46
|
+
'toolslm.xml.folder2ctx_cli': ('xml.html#folder2ctx_cli', 'toolslm/xml.py'),
|
|
47
|
+
'toolslm.xml.get_mime_text': ('xml.html#get_mime_text', 'toolslm/xml.py'),
|
|
48
|
+
'toolslm.xml.json_to_xml': ('xml.html#json_to_xml', 'toolslm/xml.py'),
|
|
49
|
+
'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
|
|
50
|
+
'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
|
|
51
|
+
'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
|
|
52
|
+
'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
|
|
53
|
+
'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
|
|
54
|
+
'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py')}}}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../03_download.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto 0
|
|
4
|
+
__all__ = ['clean_md', 'read_md', 'html2md', 'read_html', 'get_llmstxt', 'split_url', 'find_docs', 'read_docs']
|
|
5
|
+
|
|
6
|
+
# %% ../03_download.ipynb
|
|
7
|
+
from fastcore.utils import *
|
|
8
|
+
from httpx import get
|
|
9
|
+
from fastcore.meta import delegates
|
|
10
|
+
from urllib.parse import urlparse, urljoin
|
|
11
|
+
from .xml import parse_gh_url
|
|
12
|
+
|
|
13
|
+
# %% ../03_download.ipynb
|
|
14
|
+
def clean_md(text, rm_comments=True, rm_details=True):
|
|
15
|
+
"Remove comments and `<details>` sections from `text`"
|
|
16
|
+
if rm_comments: text = re.sub(r'\n?<!--.*?-->\n?', '', text, flags=re.DOTALL)
|
|
17
|
+
if rm_details: text = re.sub(r'\n?<details>.*?</details>\n?', '', text, flags=re.DOTALL)
|
|
18
|
+
return text
|
|
19
|
+
|
|
20
|
+
# %% ../03_download.ipynb
|
|
21
|
+
@delegates(get)
|
|
22
|
+
def read_md(url, rm_comments=True, rm_details=True, **kwargs):
|
|
23
|
+
"Read text from `url` and clean with `clean_docs`"
|
|
24
|
+
return clean_md(get(url, **kwargs).text, rm_comments=rm_comments, rm_details=rm_details)
|
|
25
|
+
|
|
26
|
+
# %% ../03_download.ipynb
|
|
27
|
+
def html2md(s:str, ignore_links=True):
|
|
28
|
+
"Convert `s` from HTML to markdown"
|
|
29
|
+
import html2text
|
|
30
|
+
o = html2text.HTML2Text(bodywidth=5000)
|
|
31
|
+
o.ignore_links = ignore_links
|
|
32
|
+
o.mark_code = True
|
|
33
|
+
o.ignore_images = True
|
|
34
|
+
return o.handle(s)
|
|
35
|
+
|
|
36
|
+
# %% ../03_download.ipynb
|
|
37
|
+
def read_html(url, # URL to read
|
|
38
|
+
sel=None, # Read only outerHTML of CSS selector `sel`
|
|
39
|
+
rm_comments=True, # Removes HTML comments
|
|
40
|
+
rm_details=True, # Removes `<details>` tags
|
|
41
|
+
multi=False, # Get all matches to `sel` or first one
|
|
42
|
+
wrap_tag=None, #If multi, each selection wrapped with <wrap_tag>content</wrap_tag>
|
|
43
|
+
ignore_links=True,
|
|
44
|
+
): # Cleaned markdown
|
|
45
|
+
"Get `url`, optionally selecting CSS selector `sel`, and convert to clean markdown"
|
|
46
|
+
page = get(url).text
|
|
47
|
+
if sel:
|
|
48
|
+
from bs4 import BeautifulSoup
|
|
49
|
+
soup = BeautifulSoup(page, 'html.parser')
|
|
50
|
+
if multi:
|
|
51
|
+
page = [str(el) for el in soup.select(sel)]
|
|
52
|
+
if not wrap_tag: page = "\n".join(page)
|
|
53
|
+
else: page = str(soup.select_one(sel))
|
|
54
|
+
mds = map(lambda x: clean_md(html2md(x, ignore_links=ignore_links), rm_comments, rm_details=rm_details), tuplify(page))
|
|
55
|
+
if wrap_tag: return '\n'.join([f"\n<{wrap_tag}>\n{o}</{wrap_tag}>\n" for o in mds])
|
|
56
|
+
else: return'\n'.join(mds)
|
|
57
|
+
|
|
58
|
+
# %% ../03_download.ipynb
|
|
59
|
+
def get_llmstxt(url, optional=False, n_workers=None):
|
|
60
|
+
"Get llms.txt file from and expand it with `llms_txt.create_ctx()`"
|
|
61
|
+
if not url.endswith('llms.txt'): return None
|
|
62
|
+
import llms_txt
|
|
63
|
+
resp = get(url)
|
|
64
|
+
if resp.status_code!=200: return None
|
|
65
|
+
return llms_txt.create_ctx(resp.text, optional=optional, n_workers=n_workers)
|
|
66
|
+
|
|
67
|
+
# %% ../03_download.ipynb
|
|
68
|
+
def split_url(url):
|
|
69
|
+
"Split `url` into base, path, and file name, normalising name to '/' if empty"
|
|
70
|
+
parsed = urlparse(url.strip('/'))
|
|
71
|
+
base = f"{parsed.scheme}://{parsed.netloc}"
|
|
72
|
+
path,spl,fname = parsed.path.rpartition('/')
|
|
73
|
+
fname = spl+fname
|
|
74
|
+
if not path and not fname: path='/'
|
|
75
|
+
return base,path,fname
|
|
76
|
+
|
|
77
|
+
# %% ../03_download.ipynb
|
|
78
|
+
def _tryget(url):
|
|
79
|
+
"Return response from `url` if `status_code!=404`, otherwise `None`"
|
|
80
|
+
res = get(url)
|
|
81
|
+
return None if res.status_code==404 else url
|
|
82
|
+
|
|
83
|
+
# %% ../03_download.ipynb
|
|
84
|
+
def find_docs(url):
|
|
85
|
+
"If available, return LLM-friendly llms.txt context or markdown file location from `url`"
|
|
86
|
+
base,path,fname = split_url(url)
|
|
87
|
+
url = (base+path+fname).strip('/')
|
|
88
|
+
if fname=='/llms.txt': return url
|
|
89
|
+
if Path(fname).suffix in('.md', '.txt', '.rst'): return _tryget(url)
|
|
90
|
+
if '.' in fname: return _tryget(url+'.md') or find_docs(url[:url.rfind('/')])
|
|
91
|
+
res = _tryget(url+'/llms.txt')
|
|
92
|
+
if res: return res
|
|
93
|
+
res = _tryget(url+'/index.md')
|
|
94
|
+
if res: return res
|
|
95
|
+
res = _tryget(url+'/index.html.md')
|
|
96
|
+
if res: return res
|
|
97
|
+
res = _tryget(url+'/index-commonmark.md')
|
|
98
|
+
if res: return res
|
|
99
|
+
parsed_url = urlparse(url)
|
|
100
|
+
if parsed_url.path == '/' or not parsed_url.path: return None
|
|
101
|
+
return find_docs(urljoin(url, '..'))
|
|
102
|
+
|
|
103
|
+
# %% ../03_download.ipynb
|
|
104
|
+
def read_docs(url, optional=False, n_workers=None, rm_comments=True, rm_details=True):
|
|
105
|
+
"If available, return LLM-friendly llms.txt context or markdown file response for `url`"
|
|
106
|
+
url = find_docs(url)
|
|
107
|
+
if url.endswith('/llms.txt'): res = get_llmstxt(url, optional=optional, n_workers=n_workers)
|
|
108
|
+
else: res = get(url).text
|
|
109
|
+
return clean_md(res, rm_comments=rm_comments, rm_details=rm_details)
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../01_funccall.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto 0
|
|
4
|
+
__all__ = ['empty', 'custom_types', 'get_schema', 'python', 'mk_ns', 'call_func', 'call_func_async', 'mk_param', 'schema2sig',
|
|
5
|
+
'mk_tool']
|
|
6
|
+
|
|
7
|
+
# %% ../01_funccall.ipynb
|
|
8
|
+
import inspect, json, ast
|
|
9
|
+
from collections import abc
|
|
10
|
+
from fastcore.utils import *
|
|
11
|
+
from fastcore.docments import docments
|
|
12
|
+
from typing import get_origin, get_args, Dict, List, Optional, Tuple, Union, Any
|
|
13
|
+
from types import UnionType
|
|
14
|
+
from typing import get_type_hints
|
|
15
|
+
from inspect import Parameter, Signature
|
|
16
|
+
from decimal import Decimal
|
|
17
|
+
from uuid import UUID
|
|
18
|
+
|
|
19
|
+
# %% ../01_funccall.ipynb
|
|
20
|
+
empty = inspect.Parameter.empty
|
|
21
|
+
|
|
22
|
+
# %% ../01_funccall.ipynb
|
|
23
|
+
def _types(t:type)->tuple[str,Optional[str]]:
|
|
24
|
+
"Tuple of json schema type name and (if appropriate) array item name."
|
|
25
|
+
if t is empty: raise TypeError('Missing type')
|
|
26
|
+
tmap = {int:"integer", float:"number", str:"string", bool:"boolean", list:"array", dict:"object"}
|
|
27
|
+
tmap.update({k.__name__: v for k, v in tmap.items()})
|
|
28
|
+
if getattr(t, '__origin__', None) in (list,tuple):
|
|
29
|
+
args = getattr(t, '__args__', None)
|
|
30
|
+
item_type = "object" if not args else tmap.get(t.__args__[0].__name__, "object")
|
|
31
|
+
return "array", item_type
|
|
32
|
+
# if t is a string like 'int', directly use the string as the key
|
|
33
|
+
elif isinstance(t, str): return tmap.get(t, "object"), None
|
|
34
|
+
# if t is the type itself and a container
|
|
35
|
+
elif get_origin(t): return tmap.get(get_origin(t).__name__, "object"), None
|
|
36
|
+
# if t is the type itself like int, use the __name__ representation as the key
|
|
37
|
+
else: return tmap.get(t.__name__, "object"), None
|
|
38
|
+
|
|
39
|
+
# %% ../01_funccall.ipynb
|
|
40
|
+
def _param(
|
|
41
|
+
name, # param name
|
|
42
|
+
info, # dict from docments
|
|
43
|
+
evalable=False): # stringify defaults that can't be literal_eval'd?
|
|
44
|
+
"json schema parameter given `name` and `info` from docments full dict"
|
|
45
|
+
paramt,itemt = _types(info.anno)
|
|
46
|
+
pschema = dict(type=paramt, description=info.docment or "")
|
|
47
|
+
if itemt: pschema["items"] = {"type": itemt}
|
|
48
|
+
if info.default is not empty:
|
|
49
|
+
if evalable:
|
|
50
|
+
try: ast.literal_eval(repr(info.default))
|
|
51
|
+
except: pschema["default"] = str(info.default)
|
|
52
|
+
else: pschema["default"] = info.default
|
|
53
|
+
else: pschema["default"] = info.default
|
|
54
|
+
return pschema
|
|
55
|
+
|
|
56
|
+
# %% ../01_funccall.ipynb
|
|
57
|
+
custom_types = {Path, bytes, Decimal, UUID}
|
|
58
|
+
|
|
59
|
+
def _handle_type(t, defs):
|
|
60
|
+
"Handle a single type, creating nested schemas if necessary"
|
|
61
|
+
if t is NoneType: return {'type': 'null'}
|
|
62
|
+
if t in custom_types: return {'type':'string', 'format':t.__name__}
|
|
63
|
+
if t in (dict, list, tuple, set): return {'type': _types(t)[0]}
|
|
64
|
+
if isinstance(t, type) and not issubclass(t, (int, float, str, bool)) or inspect.isfunction(t):
|
|
65
|
+
defs[t.__name__] = _get_nested_schema(t)
|
|
66
|
+
return {'$ref': f'#/$defs/{t.__name__}'}
|
|
67
|
+
return {'type': _types(t)[0]}
|
|
68
|
+
|
|
69
|
+
# %% ../01_funccall.ipynb
|
|
70
|
+
def _is_container(t):
|
|
71
|
+
"Check if type is a container (list, dict, tuple, set, Union)"
|
|
72
|
+
origin = get_origin(t)
|
|
73
|
+
return origin in (list, dict, tuple, set, Union) if origin else False
|
|
74
|
+
|
|
75
|
+
def _is_parameterized(t):
|
|
76
|
+
"Check if type has arguments (e.g. list[int] vs list, dict[str, int] vs dict)"
|
|
77
|
+
return _is_container(t) and (get_args(t) != ())
|
|
78
|
+
|
|
79
|
+
# %% ../01_funccall.ipynb
|
|
80
|
+
def _handle_container(origin, args, defs):
|
|
81
|
+
"Handle container types like dict, list, tuple, set, and Union"
|
|
82
|
+
if origin is Union or origin is UnionType:
|
|
83
|
+
return {"anyOf": [_handle_type(arg, defs) for arg in args]}
|
|
84
|
+
if origin is dict:
|
|
85
|
+
value_type = args[1].__args__[0] if hasattr(args[1], '__args__') else args[1]
|
|
86
|
+
return {
|
|
87
|
+
'type': 'object',
|
|
88
|
+
'additionalProperties': (
|
|
89
|
+
{'type': 'array', 'items': _handle_type(value_type, defs)}
|
|
90
|
+
if hasattr(args[1], '__origin__') else _handle_type(args[1], defs)
|
|
91
|
+
)
|
|
92
|
+
}
|
|
93
|
+
elif origin in (list, tuple, set):
|
|
94
|
+
schema = {'type': 'array', 'items': _handle_type(args[0], defs)}
|
|
95
|
+
if origin is set:
|
|
96
|
+
schema['uniqueItems'] = True
|
|
97
|
+
return schema
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
# %% ../01_funccall.ipynb
|
|
101
|
+
def _process_property(name, obj, props, req, defs, evalable=False):
|
|
102
|
+
"Process a single property of the schema"
|
|
103
|
+
p = _param(name, obj, evalable=evalable)
|
|
104
|
+
props[name] = p
|
|
105
|
+
if obj.default is empty: req[name] = True
|
|
106
|
+
|
|
107
|
+
if _is_container(obj.anno) and _is_parameterized(obj.anno):
|
|
108
|
+
p.update(_handle_container(get_origin(obj.anno), get_args(obj.anno), defs))
|
|
109
|
+
else:
|
|
110
|
+
# Non-container type or container without arguments
|
|
111
|
+
p.update(_handle_type(obj.anno, defs))
|
|
112
|
+
|
|
113
|
+
# %% ../01_funccall.ipynb
|
|
114
|
+
def _get_nested_schema(obj, evalable=False, skip_hidden=False):
|
|
115
|
+
"Generate nested JSON schema for a class or function"
|
|
116
|
+
d = docments(obj, full=True)
|
|
117
|
+
props, req, defs = {}, {}, {}
|
|
118
|
+
|
|
119
|
+
for n, o in d.items():
|
|
120
|
+
if n != 'return' and n != 'self' and not (skip_hidden and n.startswith('_')):
|
|
121
|
+
_process_property(n, o, props, req, defs, evalable=evalable)
|
|
122
|
+
|
|
123
|
+
tkw = {}
|
|
124
|
+
if isinstance(obj, type): tkw['title']=obj.__name__
|
|
125
|
+
schema = dict(type='object', properties=props, **tkw)
|
|
126
|
+
if req: schema['required'] = list(req)
|
|
127
|
+
if defs: schema['$defs'] = defs
|
|
128
|
+
return schema
|
|
129
|
+
|
|
130
|
+
# %% ../01_funccall.ipynb
|
|
131
|
+
def get_schema(
|
|
132
|
+
f:Union[callable,dict], # Function to get schema for
|
|
133
|
+
pname='input_schema', # Key name for parameters
|
|
134
|
+
evalable=False, # stringify defaults that can't be literal_eval'd?
|
|
135
|
+
skip_hidden=False # skip parameters starting with '_'?
|
|
136
|
+
)->dict: # {'name':..., 'description':..., pname:...}
|
|
137
|
+
"Generate JSON schema for a class, function, or method"
|
|
138
|
+
if isinstance(f, dict): return f
|
|
139
|
+
schema = _get_nested_schema(f, evalable=evalable, skip_hidden=skip_hidden)
|
|
140
|
+
desc = f.__doc__
|
|
141
|
+
assert desc, "Docstring missing!"
|
|
142
|
+
d = docments(f, full=True)
|
|
143
|
+
ret = d.pop('return')
|
|
144
|
+
if (ret.anno is not empty) and (ret.anno is not None): desc += f'\n\nReturns:\n- type: {_types(ret.anno)[0]}'
|
|
145
|
+
return {"name": f.__name__, "description": desc, pname: schema}
|
|
146
|
+
|
|
147
|
+
# %% ../01_funccall.ipynb
|
|
148
|
+
import ast, time, signal, traceback
|
|
149
|
+
from fastcore.utils import *
|
|
150
|
+
|
|
151
|
+
# %% ../01_funccall.ipynb
|
|
152
|
+
def _copy_loc(new, orig):
|
|
153
|
+
"Copy location information from original node to new node and all children."
|
|
154
|
+
new = ast.copy_location(new, orig)
|
|
155
|
+
for field, o in ast.iter_fields(new):
|
|
156
|
+
if isinstance(o, ast.AST): setattr(new, field, _copy_loc(o, orig))
|
|
157
|
+
elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])
|
|
158
|
+
return new
|
|
159
|
+
|
|
160
|
+
# %% ../01_funccall.ipynb
|
|
161
|
+
def _run(code:str, glb:dict=None, loc:dict=None):
|
|
162
|
+
"Run `code`, returning final expression (similar to IPython)"
|
|
163
|
+
tree = ast.parse(code)
|
|
164
|
+
last_node = tree.body[-1] if tree.body else None
|
|
165
|
+
|
|
166
|
+
# If the last node is an expression, modify the AST to capture the result
|
|
167
|
+
if isinstance(last_node, ast.Expr):
|
|
168
|
+
tgt = [ast.Name(id='_result', ctx=ast.Store())]
|
|
169
|
+
assign_node = ast.Assign(targets=tgt, value=last_node.value)
|
|
170
|
+
tree.body[-1] = _copy_loc(assign_node, last_node)
|
|
171
|
+
|
|
172
|
+
compiled_code = compile(tree, filename='<ast>', mode='exec')
|
|
173
|
+
glb = glb or {}
|
|
174
|
+
stdout_buffer = io.StringIO()
|
|
175
|
+
saved_stdout = sys.stdout
|
|
176
|
+
sys.stdout = stdout_buffer
|
|
177
|
+
try: exec(compiled_code, glb, loc)
|
|
178
|
+
finally: sys.stdout = saved_stdout
|
|
179
|
+
_result = glb.get('_result', None)
|
|
180
|
+
if _result is not None: return _result
|
|
181
|
+
return stdout_buffer.getvalue().strip()
|
|
182
|
+
|
|
183
|
+
# %% ../01_funccall.ipynb
|
|
184
|
+
def python(
|
|
185
|
+
code:str, # Code to execute
|
|
186
|
+
glb:Optional[dict]=None, # Globals namespace
|
|
187
|
+
loc:Optional[dict]=None, # Locals namespace
|
|
188
|
+
timeout:int=3600 # Maximum run time in seconds
|
|
189
|
+
):
|
|
190
|
+
"Executes python `code` with `timeout` and returning final expression (similar to IPython)."
|
|
191
|
+
def handler(*args): raise TimeoutError()
|
|
192
|
+
if glb is None: glb = inspect.currentframe().f_back.f_globals
|
|
193
|
+
if loc is None: loc=glb
|
|
194
|
+
signal.signal(signal.SIGALRM, handler)
|
|
195
|
+
signal.alarm(timeout)
|
|
196
|
+
try: return _run(code, glb, loc)
|
|
197
|
+
except Exception as e: return traceback.format_exc()
|
|
198
|
+
finally: signal.alarm(0)
|
|
199
|
+
|
|
200
|
+
# %% ../01_funccall.ipynb
|
|
201
|
+
def mk_ns(fs):
|
|
202
|
+
if isinstance(fs, abc.Mapping): return fs
|
|
203
|
+
merged = {}
|
|
204
|
+
for o in listify(fs):
|
|
205
|
+
if isinstance(o, dict): merged |= o
|
|
206
|
+
elif callable(o) and hasattr(o, '__name__'): merged |= {o.__name__: o}
|
|
207
|
+
return merged
|
|
208
|
+
|
|
209
|
+
# %% ../01_funccall.ipynb
|
|
210
|
+
def _coerce_inputs(func, inputs):
|
|
211
|
+
"Coerce inputs based on function type annotations"
|
|
212
|
+
hints = get_type_hints(func) if hasattr(func, '__annotations__') else {}
|
|
213
|
+
res = {}
|
|
214
|
+
for k,v in inputs.items():
|
|
215
|
+
ann = hints.get(k)
|
|
216
|
+
if ann in custom_types: res[k] = ann(v)
|
|
217
|
+
elif isinstance(v, dict) and callable(ann): res[k] = ann(**v)
|
|
218
|
+
else: res[k] = v
|
|
219
|
+
return res
|
|
220
|
+
|
|
221
|
+
# %% ../01_funccall.ipynb
|
|
222
|
+
def call_func(fc_name, fc_inputs, ns, raise_on_err=True):
|
|
223
|
+
"Call the function `fc_name` with the given `fc_inputs` using namespace `ns`."
|
|
224
|
+
if not isinstance(ns, abc.Mapping): ns = mk_ns(ns)
|
|
225
|
+
func = ns[fc_name]
|
|
226
|
+
inps = {re.sub(r'\W', '', k):v for k,v in fc_inputs.items()}
|
|
227
|
+
inps = _coerce_inputs(func, inps)
|
|
228
|
+
try: return func(**inps)
|
|
229
|
+
except Exception as e:
|
|
230
|
+
if raise_on_err: raise e from None
|
|
231
|
+
else: return traceback.format_exc()
|
|
232
|
+
|
|
233
|
+
# %% ../01_funccall.ipynb
|
|
234
|
+
async def call_func_async(fc_name, fc_inputs, ns, raise_on_err=True):
|
|
235
|
+
"Awaits the function `fc_name` with the given `fc_inputs` using namespace `ns`."
|
|
236
|
+
res = call_func(fc_name, fc_inputs, ns, raise_on_err=raise_on_err)
|
|
237
|
+
if inspect.iscoroutine(res):
|
|
238
|
+
try: res = await res
|
|
239
|
+
except Exception as e:
|
|
240
|
+
if raise_on_err: raise e from None
|
|
241
|
+
else: return traceback.format_exc()
|
|
242
|
+
return res
|
|
243
|
+
|
|
244
|
+
# %% ../01_funccall.ipynb
|
|
245
|
+
def mk_param(nm, props, req):
|
|
246
|
+
"Create a `Parameter` for `nm` with schema `props`"
|
|
247
|
+
kind = Parameter.POSITIONAL_OR_KEYWORD if nm in req else Parameter.KEYWORD_ONLY
|
|
248
|
+
default = Parameter.empty if nm in req else props.get('default')
|
|
249
|
+
if props.get('type') == 'array' and 'items' in props:
|
|
250
|
+
item_type = type_map.get(props['items'].get('type'), Any)
|
|
251
|
+
anno = list[item_type]
|
|
252
|
+
else: anno = type_map.get(props.get('type'), Any)
|
|
253
|
+
return Parameter(nm, kind, default=default, annotation=anno)
|
|
254
|
+
|
|
255
|
+
# %% ../01_funccall.ipynb
|
|
256
|
+
def schema2sig(tool):
|
|
257
|
+
"Convert json schema `tool` to a `Signature`"
|
|
258
|
+
props, req = tool.inputSchema['properties'], tool.inputSchema.get('required', [])
|
|
259
|
+
params = sorted([mk_param(k, v, req) for k, v in props.items()], key=lambda p: p.kind)
|
|
260
|
+
return Signature(params)
|
|
261
|
+
|
|
262
|
+
# %% ../01_funccall.ipynb
|
|
263
|
+
def mk_tool(dispfn, tool):
|
|
264
|
+
"Create a callable function from a JSON schema tool definition"
|
|
265
|
+
sig = schema2sig(tool)
|
|
266
|
+
props = tool.inputSchema['properties']
|
|
267
|
+
def fn(*args, **kwargs):
|
|
268
|
+
bound = sig.bind(*args, **kwargs)
|
|
269
|
+
return dispfn(tool.name, **bound.arguments)
|
|
270
|
+
fn.__doc__ = tool.description
|
|
271
|
+
fn.__signature__ = sig
|
|
272
|
+
fn.__name__ = fn.__qualname__ = tool.name
|
|
273
|
+
fn.__annotations__ = {k: p.annotation for k, p in sig.parameters.items()}
|
|
274
|
+
return fn
|