toolslm 0.3.16__tar.gz → 0.3.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {toolslm-0.3.16/toolslm.egg-info → toolslm-0.3.18}/PKG-INFO +1 -1
- {toolslm-0.3.16 → toolslm-0.3.18}/settings.ini +1 -1
- toolslm-0.3.18/toolslm/__init__.py +1 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm/_modidx.py +2 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm/xml.py +42 -14
- {toolslm-0.3.16 → toolslm-0.3.18/toolslm.egg-info}/PKG-INFO +1 -1
- toolslm-0.3.16/toolslm/__init__.py +0 -1
- {toolslm-0.3.16 → toolslm-0.3.18}/LICENSE +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/MANIFEST.in +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/README.md +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/pyproject.toml +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/setup.cfg +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/setup.py +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm/download.py +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm/funccall.py +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm/md_hier.py +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm/shell.py +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm.egg-info/SOURCES.txt +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm.egg-info/dependency_links.txt +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm.egg-info/entry_points.txt +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm.egg-info/not-zip-safe +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm.egg-info/requires.txt +0 -0
- {toolslm-0.3.16 → toolslm-0.3.18}/toolslm.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.18"
|
|
@@ -44,12 +44,14 @@ d = { 'settings': { 'branch': 'main',
|
|
|
44
44
|
'toolslm.xml.files2ctx': ('xml.html#files2ctx', 'toolslm/xml.py'),
|
|
45
45
|
'toolslm.xml.folder2ctx': ('xml.html#folder2ctx', 'toolslm/xml.py'),
|
|
46
46
|
'toolslm.xml.folder2ctx_cli': ('xml.html#folder2ctx_cli', 'toolslm/xml.py'),
|
|
47
|
+
'toolslm.xml.get_docstring': ('xml.html#get_docstring', 'toolslm/xml.py'),
|
|
47
48
|
'toolslm.xml.get_mime_text': ('xml.html#get_mime_text', 'toolslm/xml.py'),
|
|
48
49
|
'toolslm.xml.json_to_xml': ('xml.html#json_to_xml', 'toolslm/xml.py'),
|
|
49
50
|
'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
|
|
50
51
|
'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
|
|
51
52
|
'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
|
|
52
53
|
'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
|
|
54
|
+
'toolslm.xml.py2sigs': ('xml.html#py2sigs', 'toolslm/xml.py'),
|
|
53
55
|
'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
|
|
54
56
|
'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py'),
|
|
55
57
|
'toolslm.xml.sym2file': ('xml.html#sym2file', 'toolslm/xml.py'),
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../00_xml.ipynb.
|
|
2
2
|
|
|
3
3
|
# %% auto 0
|
|
4
|
-
__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', '
|
|
5
|
-
'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx', 'sym2pkgpath',
|
|
6
|
-
'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
|
|
4
|
+
__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'get_docstring', 'py2sigs', 'mk_doctype',
|
|
5
|
+
'mk_doc', 'docs_xml', 'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx', 'sym2pkgpath',
|
|
6
|
+
'sym2pkgctx', 'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
|
|
7
7
|
|
|
8
8
|
# %% ../00_xml.ipynb
|
|
9
|
-
import hashlib, inspect, xml.etree.ElementTree as ET
|
|
9
|
+
import hashlib, inspect, xml.etree.ElementTree as ET, ast
|
|
10
10
|
from collections import namedtuple
|
|
11
11
|
from ghapi.all import GhApi
|
|
12
12
|
|
|
@@ -50,24 +50,47 @@ def cell2out(o):
|
|
|
50
50
|
if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')
|
|
51
51
|
|
|
52
52
|
# %% ../00_xml.ipynb
|
|
53
|
-
def cell2xml(cell, out=True):
|
|
53
|
+
def cell2xml(cell, out=True, ids=True):
|
|
54
54
|
"Convert notebook cell to concise XML format"
|
|
55
55
|
src = ''.join(getattr(cell, 'source', ''))
|
|
56
56
|
f = Code if cell.cell_type=='code' else Md
|
|
57
|
-
if
|
|
57
|
+
kw = dict(id=cell.id) if ids and hasattr(cell, 'id') else {}
|
|
58
|
+
if not out: return f(src, **kw)
|
|
58
59
|
parts = [Source(src)]
|
|
59
60
|
out_items = L(getattr(cell,'outputs',[])).map(cell2out).filter()
|
|
60
61
|
if out_items: parts.append(Outs(*out_items))
|
|
61
|
-
return f(*parts)
|
|
62
|
+
return f(*parts, **kw)
|
|
62
63
|
|
|
63
64
|
# %% ../00_xml.ipynb
|
|
64
|
-
def nb2xml(fname=None, nb=None, out=True):
|
|
65
|
+
def nb2xml(fname=None, nb=None, out=True, ids=True):
|
|
65
66
|
"Convert notebook to XML format"
|
|
66
67
|
assert bool(fname)^bool(nb), "Pass either `fname` or `nb`"
|
|
67
68
|
if not nb: nb = dict2obj(fname.read_json())
|
|
68
|
-
cells_xml = [to_xml(cell2xml(c, out=out), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
|
|
69
|
+
cells_xml = [to_xml(cell2xml(c, out=out, ids=ids), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
|
|
69
70
|
return to_xml(Notebook(*cells_xml), do_escape=False)
|
|
70
71
|
|
|
72
|
+
# %% ../00_xml.ipynb
|
|
73
|
+
def get_docstring(node, lines):
|
|
74
|
+
"Get docstring from source lines if present"
|
|
75
|
+
if not (node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant)): return None
|
|
76
|
+
doc_node = node.body[0]
|
|
77
|
+
return '\n'.join(lines[doc_node.lineno-1:doc_node.end_lineno])
|
|
78
|
+
|
|
79
|
+
def py2sigs(fname=None, src=None):
|
|
80
|
+
"Return signature+docstring text for all functions and class methods in source"
|
|
81
|
+
if fname: src = Path(fname).read_text()
|
|
82
|
+
tree = ast.parse(src)
|
|
83
|
+
lines = src.splitlines()
|
|
84
|
+
res = []
|
|
85
|
+
for node in ast.walk(tree):
|
|
86
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
87
|
+
body_start = max(node.body[0].lineno - 1, node.lineno)
|
|
88
|
+
sig = '\n'.join(lines[node.lineno-1:body_start])
|
|
89
|
+
doc = get_docstring(node, lines)
|
|
90
|
+
cts = f"{sig}\n{doc}" if doc else sig
|
|
91
|
+
res.append(cts.strip('\r\n'))
|
|
92
|
+
return '\n\n'.join(res)
|
|
93
|
+
|
|
71
94
|
# %% ../00_xml.ipynb
|
|
72
95
|
doctype = namedtuple('doctype', ['src', 'content'])
|
|
73
96
|
|
|
@@ -110,15 +133,16 @@ def docs_xml(docs:list[str], # The content of each document
|
|
|
110
133
|
pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
|
|
111
134
|
if srcs is None: srcs = [None]*len(docs)
|
|
112
135
|
if details is None: details = [{}]*len(docs)
|
|
113
|
-
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
|
|
136
|
+
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)) if d.strip())
|
|
114
137
|
kw = dict(title=title) if title else {}
|
|
115
138
|
return pre + to_xml(Documents(*docs, **kw), do_escape=False)
|
|
116
139
|
|
|
117
140
|
# %% ../00_xml.ipynb
|
|
118
|
-
def read_file(fname, out=True, max_size=None):
|
|
141
|
+
def read_file(fname, out=True, max_size=None, ids=True, sigs_only=False):
|
|
119
142
|
"Read file content, converting notebooks to XML if needed"
|
|
120
143
|
fname = Path(fname)
|
|
121
|
-
if fname.suffix == '.ipynb': res = nb2xml(fname, out=out)
|
|
144
|
+
if fname.suffix == '.ipynb': res = nb2xml(fname, out=out, ids=ids)
|
|
145
|
+
elif fname.suffix == '.py' and sigs_only: res = py2sigs(fname)
|
|
122
146
|
else: res = fname.read_text()
|
|
123
147
|
if max_size and len(res)>max_size: return f"[Skipped: {fname.name} exceeds {max_size} bytes]"
|
|
124
148
|
return res
|
|
@@ -130,11 +154,13 @@ def files2ctx(
|
|
|
130
154
|
out:bool=True, # Include notebook cell outputs?
|
|
131
155
|
srcs:Optional[list]=None, # Use the labels instead of `fnames`
|
|
132
156
|
max_size:int=None, # Skip files larger than this (bytes)
|
|
157
|
+
ids:bool=True, # Include cell ids in notebooks?
|
|
158
|
+
sigs_only:bool=False, # For .py files, only include signatures and docstrings
|
|
133
159
|
**kwargs
|
|
134
160
|
)->str: # XML for LM context
|
|
135
161
|
"Convert files to XML context, handling notebooks"
|
|
136
162
|
fnames = [Path(o) for o in fnames]
|
|
137
|
-
contents = [read_file(o, out=out, max_size=max_size) for o in fnames]
|
|
163
|
+
contents = [read_file(o, out=out, max_size=max_size, ids=ids, sigs_only=sigs_only) for o in fnames]
|
|
138
164
|
return docs_xml(contents, srcs or fnames, **kwargs)
|
|
139
165
|
|
|
140
166
|
# %% ../00_xml.ipynb
|
|
@@ -149,6 +175,8 @@ def folder2ctx(
|
|
|
149
175
|
max_total:int=10_000_000, # Max total output size in bytes
|
|
150
176
|
readme_first:bool=False, # Prioritize README files at start of context?
|
|
151
177
|
files_only:bool=False, # Return dict of {filename: size} instead of context?
|
|
178
|
+
sigs_only:bool=False, # Return signatures instead of full text for python files?
|
|
179
|
+
ids:bool=True, # Include cell ids in notebooks?
|
|
152
180
|
**kwargs
|
|
153
181
|
)->Union[str,dict]:
|
|
154
182
|
"Convert folder contents to XML context, handling notebooks"
|
|
@@ -157,7 +185,7 @@ def folder2ctx(
|
|
|
157
185
|
if files_only: return {str(f.relative_to(folder)): f.stat().st_size for f in fnames}
|
|
158
186
|
if readme_first: fnames = sorted(fnames, key=lambda f: (0 if 'readme' in f.name.lower() else 1, f))
|
|
159
187
|
srcs = fnames if include_base else [f.relative_to(folder) for f in fnames]
|
|
160
|
-
res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size)
|
|
188
|
+
res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size, sigs_only=sigs_only, ids=ids)
|
|
161
189
|
suf = f"\n\n[TRUNCATED: output size {{_outsz_}} exceeded max size {max_total} bytes]"
|
|
162
190
|
if max_total and len(res) > max_total: res = truncstr(res, max_total, suf=suf, sizevar='_outsz_')
|
|
163
191
|
return res
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.3.16"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|