toolslm 0.3.17__tar.gz → 0.3.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {toolslm-0.3.17/toolslm.egg-info → toolslm-0.3.19}/PKG-INFO +1 -1
- {toolslm-0.3.17 → toolslm-0.3.19}/settings.ini +1 -1
- toolslm-0.3.19/toolslm/__init__.py +1 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm/_modidx.py +3 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm/xml.py +55 -19
- {toolslm-0.3.17 → toolslm-0.3.19/toolslm.egg-info}/PKG-INFO +1 -1
- toolslm-0.3.17/toolslm/__init__.py +0 -1
- {toolslm-0.3.17 → toolslm-0.3.19}/LICENSE +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/MANIFEST.in +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/README.md +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/pyproject.toml +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/setup.cfg +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/setup.py +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm/download.py +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm/funccall.py +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm/md_hier.py +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm/shell.py +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm.egg-info/SOURCES.txt +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm.egg-info/dependency_links.txt +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm.egg-info/entry_points.txt +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm.egg-info/not-zip-safe +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm.egg-info/requires.txt +0 -0
- {toolslm-0.3.17 → toolslm-0.3.19}/toolslm.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.19"
|
|
@@ -40,16 +40,19 @@ d = { 'settings': { 'branch': 'main',
|
|
|
40
40
|
'toolslm.xml': { 'toolslm.xml._add_nls': ('xml.html#_add_nls', 'toolslm/xml.py'),
|
|
41
41
|
'toolslm.xml.cell2out': ('xml.html#cell2out', 'toolslm/xml.py'),
|
|
42
42
|
'toolslm.xml.cell2xml': ('xml.html#cell2xml', 'toolslm/xml.py'),
|
|
43
|
+
'toolslm.xml.cells2xml': ('xml.html#cells2xml', 'toolslm/xml.py'),
|
|
43
44
|
'toolslm.xml.docs_xml': ('xml.html#docs_xml', 'toolslm/xml.py'),
|
|
44
45
|
'toolslm.xml.files2ctx': ('xml.html#files2ctx', 'toolslm/xml.py'),
|
|
45
46
|
'toolslm.xml.folder2ctx': ('xml.html#folder2ctx', 'toolslm/xml.py'),
|
|
46
47
|
'toolslm.xml.folder2ctx_cli': ('xml.html#folder2ctx_cli', 'toolslm/xml.py'),
|
|
48
|
+
'toolslm.xml.get_docstring': ('xml.html#get_docstring', 'toolslm/xml.py'),
|
|
47
49
|
'toolslm.xml.get_mime_text': ('xml.html#get_mime_text', 'toolslm/xml.py'),
|
|
48
50
|
'toolslm.xml.json_to_xml': ('xml.html#json_to_xml', 'toolslm/xml.py'),
|
|
49
51
|
'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
|
|
50
52
|
'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
|
|
51
53
|
'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
|
|
52
54
|
'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
|
|
55
|
+
'toolslm.xml.py2sigs': ('xml.html#py2sigs', 'toolslm/xml.py'),
|
|
53
56
|
'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
|
|
54
57
|
'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py'),
|
|
55
58
|
'toolslm.xml.sym2file': ('xml.html#sym2file', 'toolslm/xml.py'),
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../00_xml.ipynb.
|
|
2
2
|
|
|
3
3
|
# %% auto 0
|
|
4
|
-
__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', '
|
|
5
|
-
'
|
|
6
|
-
'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
|
|
4
|
+
__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'cells2xml', 'nb2xml', 'get_docstring', 'py2sigs',
|
|
5
|
+
'mk_doctype', 'mk_doc', 'docs_xml', 'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx',
|
|
6
|
+
'sym2pkgpath', 'sym2pkgctx', 'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
|
|
7
7
|
|
|
8
8
|
# %% ../00_xml.ipynb
|
|
9
|
-
import hashlib, inspect, xml.etree.ElementTree as ET
|
|
9
|
+
import hashlib, inspect, xml.etree.ElementTree as ET, ast
|
|
10
10
|
from collections import namedtuple
|
|
11
11
|
from ghapi.all import GhApi
|
|
12
12
|
|
|
13
13
|
from fastcore.utils import *
|
|
14
14
|
from fastcore.meta import delegates
|
|
15
15
|
from fastcore.xtras import hl_md
|
|
16
|
-
from fastcore.xml import to_xml, Document, Documents, Document_content, Src, Source,Out,Outs,Cell,Notebook,Md,Code
|
|
16
|
+
from fastcore.xml import to_xml, Document, Documents, Document_content, Src, Source,Out,Outs,Cell,Notebook,Md,Code,Raw
|
|
17
17
|
from fastcore.script import call_parse
|
|
18
18
|
|
|
19
19
|
# %% ../00_xml.ipynb
|
|
@@ -50,10 +50,13 @@ def cell2out(o):
|
|
|
50
50
|
if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')
|
|
51
51
|
|
|
52
52
|
# %% ../00_xml.ipynb
|
|
53
|
-
|
|
53
|
+
_ctfuns = {'code': Code, 'markdown': Md, 'raw': Raw}
|
|
54
|
+
|
|
55
|
+
def cell2xml(cell, out=True, ids=True, nums=False):
|
|
54
56
|
"Convert notebook cell to concise XML format"
|
|
55
57
|
src = ''.join(getattr(cell, 'source', ''))
|
|
56
|
-
|
|
58
|
+
if nums: src = '\n'.join(f'{i+1:6d} │ {l}' for i,l in enumerate(src.splitlines()))
|
|
59
|
+
f = _ctfuns[cell.cell_type]
|
|
57
60
|
kw = dict(id=cell.id) if ids and hasattr(cell, 'id') else {}
|
|
58
61
|
if not out: return f(src, **kw)
|
|
59
62
|
parts = [Source(src)]
|
|
@@ -62,12 +65,40 @@ def cell2xml(cell, out=True, ids=True):
|
|
|
62
65
|
return f(*parts, **kw)
|
|
63
66
|
|
|
64
67
|
# %% ../00_xml.ipynb
|
|
65
|
-
|
|
68
|
+
@delegates(cell2xml)
|
|
69
|
+
def cells2xml(cells, wrap=Notebook, **kwargs):
|
|
70
|
+
"Convert notebook to XML format"
|
|
71
|
+
res = [cell2xml(c, **kwargs) for c in cells]
|
|
72
|
+
return to_xml(wrap(*res), do_escape=False)
|
|
73
|
+
|
|
74
|
+
@delegates(cell2xml)
|
|
75
|
+
def nb2xml(fname=None, nb=None, **kwargs):
|
|
66
76
|
"Convert notebook to XML format"
|
|
67
77
|
assert bool(fname)^bool(nb), "Pass either `fname` or `nb`"
|
|
68
78
|
if not nb: nb = dict2obj(fname.read_json())
|
|
69
|
-
|
|
70
|
-
|
|
79
|
+
return cells2xml(nb.cells, **kwargs)
|
|
80
|
+
|
|
81
|
+
# %% ../00_xml.ipynb
|
|
82
|
+
def get_docstring(node, lines):
|
|
83
|
+
"Get docstring from source lines if present"
|
|
84
|
+
if not (node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant)): return None
|
|
85
|
+
doc_node = node.body[0]
|
|
86
|
+
return '\n'.join(lines[doc_node.lineno-1:doc_node.end_lineno])
|
|
87
|
+
|
|
88
|
+
def py2sigs(fname=None, src=None):
|
|
89
|
+
"Return signature+docstring text for all functions and class methods in source"
|
|
90
|
+
if fname: src = Path(fname).expanduser().read_text()
|
|
91
|
+
tree = ast.parse(src)
|
|
92
|
+
lines = src.splitlines()
|
|
93
|
+
res = []
|
|
94
|
+
for node in ast.walk(tree):
|
|
95
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
96
|
+
body_start = max(node.body[0].lineno - 1, node.lineno)
|
|
97
|
+
sig = '\n'.join(lines[node.lineno-1:body_start])
|
|
98
|
+
doc = get_docstring(node, lines)
|
|
99
|
+
cts = f"{sig}\n{doc}" if doc else sig
|
|
100
|
+
res.append(cts.strip('\r\n'))
|
|
101
|
+
return '\n\n'.join(res)
|
|
71
102
|
|
|
72
103
|
# %% ../00_xml.ipynb
|
|
73
104
|
doctype = namedtuple('doctype', ['src', 'content'])
|
|
@@ -111,15 +142,17 @@ def docs_xml(docs:list[str], # The content of each document
|
|
|
111
142
|
pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
|
|
112
143
|
if srcs is None: srcs = [None]*len(docs)
|
|
113
144
|
if details is None: details = [{}]*len(docs)
|
|
114
|
-
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
|
|
145
|
+
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)) if d.strip())
|
|
115
146
|
kw = dict(title=title) if title else {}
|
|
116
147
|
return pre + to_xml(Documents(*docs, **kw), do_escape=False)
|
|
117
148
|
|
|
118
149
|
# %% ../00_xml.ipynb
|
|
119
|
-
|
|
150
|
+
@delegates(nb2xml)
|
|
151
|
+
def read_file(fname, max_size=None, sigs_only=False, **kwargs):
|
|
120
152
|
"Read file content, converting notebooks to XML if needed"
|
|
121
|
-
fname = Path(fname)
|
|
122
|
-
if fname.suffix == '.ipynb': res = nb2xml(fname,
|
|
153
|
+
fname = Path(fname).expanduser()
|
|
154
|
+
if fname.suffix == '.ipynb': res = nb2xml(fname, **kwargs)
|
|
155
|
+
elif fname.suffix == '.py' and sigs_only: res = py2sigs(fname)
|
|
123
156
|
else: res = fname.read_text()
|
|
124
157
|
if max_size and len(res)>max_size: return f"[Skipped: {fname.name} exceeds {max_size} bytes]"
|
|
125
158
|
return res
|
|
@@ -128,15 +161,17 @@ def read_file(fname, out=True, max_size=None, ids=True):
|
|
|
128
161
|
@delegates(docs_xml)
|
|
129
162
|
def files2ctx(
|
|
130
163
|
fnames:list[Union[str,Path]], # List of file names to add to context
|
|
131
|
-
out:bool=True, # Include notebook cell outputs?
|
|
132
164
|
srcs:Optional[list]=None, # Use the labels instead of `fnames`
|
|
133
165
|
max_size:int=None, # Skip files larger than this (bytes)
|
|
166
|
+
out:bool=True, # Include notebook cell outputs?
|
|
134
167
|
ids:bool=True, # Include cell ids in notebooks?
|
|
168
|
+
nums:bool=False, # Include line numbers in notebook cell source?
|
|
169
|
+
sigs_only:bool=False, # For .py files, only include signatures and docstrings
|
|
135
170
|
**kwargs
|
|
136
171
|
)->str: # XML for LM context
|
|
137
172
|
"Convert files to XML context, handling notebooks"
|
|
138
|
-
fnames = [Path(o) for o in fnames]
|
|
139
|
-
contents = [read_file(o, out=out,
|
|
173
|
+
fnames = [Path(o).expanduser() for o in listify(fnames)]
|
|
174
|
+
contents = [read_file(o, max_size=max_size, out=out, ids=ids, sigs_only=sigs_only, nums=nums) for o in fnames]
|
|
140
175
|
return docs_xml(contents, srcs or fnames, **kwargs)
|
|
141
176
|
|
|
142
177
|
# %% ../00_xml.ipynb
|
|
@@ -151,16 +186,17 @@ def folder2ctx(
|
|
|
151
186
|
max_total:int=10_000_000, # Max total output size in bytes
|
|
152
187
|
readme_first:bool=False, # Prioritize README files at start of context?
|
|
153
188
|
files_only:bool=False, # Return dict of {filename: size} instead of context?
|
|
189
|
+
sigs_only:bool=False, # Return signatures instead of full text for python files?
|
|
154
190
|
ids:bool=True, # Include cell ids in notebooks?
|
|
155
191
|
**kwargs
|
|
156
192
|
)->Union[str,dict]:
|
|
157
193
|
"Convert folder contents to XML context, handling notebooks"
|
|
158
|
-
folder = Path(folder)
|
|
194
|
+
folder = Path(folder).expanduser()
|
|
159
195
|
fnames = pglob(folder, **kwargs)
|
|
160
196
|
if files_only: return {str(f.relative_to(folder)): f.stat().st_size for f in fnames}
|
|
161
197
|
if readme_first: fnames = sorted(fnames, key=lambda f: (0 if 'readme' in f.name.lower() else 1, f))
|
|
162
198
|
srcs = fnames if include_base else [f.relative_to(folder) for f in fnames]
|
|
163
|
-
res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size, ids=ids)
|
|
199
|
+
res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size, sigs_only=sigs_only, ids=ids)
|
|
164
200
|
suf = f"\n\n[TRUNCATED: output size {{_outsz_}} exceeded max size {max_total} bytes]"
|
|
165
201
|
if max_total and len(res) > max_total: res = truncstr(res, max_total, suf=suf, sizevar='_outsz_')
|
|
166
202
|
return res
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.3.17"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|