toolslm 0.3.9__py3-none-any.whl → 0.3.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolslm/__init__.py +1 -1
- toolslm/_modidx.py +10 -1
- toolslm/download.py +1 -0
- toolslm/funccall.py +30 -12
- toolslm/xml.py +155 -47
- {toolslm-0.3.9.dist-info → toolslm-0.3.23.dist-info}/METADATA +3 -2
- toolslm-0.3.23.dist-info/RECORD +13 -0
- {toolslm-0.3.9.dist-info → toolslm-0.3.23.dist-info}/entry_points.txt +1 -0
- toolslm-0.3.9.dist-info/RECORD +0 -13
- {toolslm-0.3.9.dist-info → toolslm-0.3.23.dist-info}/WHEEL +0 -0
- {toolslm-0.3.9.dist-info → toolslm-0.3.23.dist-info}/licenses/LICENSE +0 -0
- {toolslm-0.3.9.dist-info → toolslm-0.3.23.dist-info}/top_level.txt +0 -0
toolslm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.3.
|
|
1
|
+
__version__ = "0.3.23"
|
toolslm/_modidx.py
CHANGED
|
@@ -40,14 +40,23 @@ d = { 'settings': { 'branch': 'main',
|
|
|
40
40
|
'toolslm.xml': { 'toolslm.xml._add_nls': ('xml.html#_add_nls', 'toolslm/xml.py'),
|
|
41
41
|
'toolslm.xml.cell2out': ('xml.html#cell2out', 'toolslm/xml.py'),
|
|
42
42
|
'toolslm.xml.cell2xml': ('xml.html#cell2xml', 'toolslm/xml.py'),
|
|
43
|
+
'toolslm.xml.cells2xml': ('xml.html#cells2xml', 'toolslm/xml.py'),
|
|
43
44
|
'toolslm.xml.docs_xml': ('xml.html#docs_xml', 'toolslm/xml.py'),
|
|
44
45
|
'toolslm.xml.files2ctx': ('xml.html#files2ctx', 'toolslm/xml.py'),
|
|
45
46
|
'toolslm.xml.folder2ctx': ('xml.html#folder2ctx', 'toolslm/xml.py'),
|
|
46
47
|
'toolslm.xml.folder2ctx_cli': ('xml.html#folder2ctx_cli', 'toolslm/xml.py'),
|
|
48
|
+
'toolslm.xml.get_docstring': ('xml.html#get_docstring', 'toolslm/xml.py'),
|
|
47
49
|
'toolslm.xml.get_mime_text': ('xml.html#get_mime_text', 'toolslm/xml.py'),
|
|
48
50
|
'toolslm.xml.json_to_xml': ('xml.html#json_to_xml', 'toolslm/xml.py'),
|
|
49
51
|
'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
|
|
50
52
|
'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
|
|
51
53
|
'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
|
|
54
|
+
'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
|
|
55
|
+
'toolslm.xml.py2sigs': ('xml.html#py2sigs', 'toolslm/xml.py'),
|
|
52
56
|
'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
|
|
53
|
-
'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py')
|
|
57
|
+
'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py'),
|
|
58
|
+
'toolslm.xml.repo2ctx_cli': ('xml.html#repo2ctx_cli', 'toolslm/xml.py'),
|
|
59
|
+
'toolslm.xml.sym2file': ('xml.html#sym2file', 'toolslm/xml.py'),
|
|
60
|
+
'toolslm.xml.sym2folderctx': ('xml.html#sym2folderctx', 'toolslm/xml.py'),
|
|
61
|
+
'toolslm.xml.sym2pkgctx': ('xml.html#sym2pkgctx', 'toolslm/xml.py'),
|
|
62
|
+
'toolslm.xml.sym2pkgpath': ('xml.html#sym2pkgpath', 'toolslm/xml.py')}}}
|
toolslm/download.py
CHANGED
toolslm/funccall.py
CHANGED
|
@@ -5,7 +5,7 @@ __all__ = ['empty', 'custom_types', 'get_schema', 'python', 'mk_ns', 'call_func'
|
|
|
5
5
|
'mk_tool']
|
|
6
6
|
|
|
7
7
|
# %% ../01_funccall.ipynb
|
|
8
|
-
import inspect, json
|
|
8
|
+
import inspect, json, ast
|
|
9
9
|
from collections import abc
|
|
10
10
|
from fastcore.utils import *
|
|
11
11
|
from fastcore.docments import docments
|
|
@@ -37,12 +37,20 @@ def _types(t:type)->tuple[str,Optional[str]]:
|
|
|
37
37
|
else: return tmap.get(t.__name__, "object"), None
|
|
38
38
|
|
|
39
39
|
# %% ../01_funccall.ipynb
|
|
40
|
-
def _param(
|
|
41
|
-
|
|
40
|
+
def _param(
|
|
41
|
+
name, # param name
|
|
42
|
+
info, # dict from docments
|
|
43
|
+
evalable=False): # stringify defaults that can't be literal_eval'd?
|
|
44
|
+
"json schema parameter given `name` and `info` from docments full dict"
|
|
42
45
|
paramt,itemt = _types(info.anno)
|
|
43
46
|
pschema = dict(type=paramt, description=info.docment or "")
|
|
44
47
|
if itemt: pschema["items"] = {"type": itemt}
|
|
45
|
-
if info.default is not empty:
|
|
48
|
+
if info.default is not empty:
|
|
49
|
+
if evalable:
|
|
50
|
+
try: ast.literal_eval(repr(info.default))
|
|
51
|
+
except: pschema["default"] = str(info.default)
|
|
52
|
+
else: pschema["default"] = info.default
|
|
53
|
+
else: pschema["default"] = info.default
|
|
46
54
|
return pschema
|
|
47
55
|
|
|
48
56
|
# %% ../01_funccall.ipynb
|
|
@@ -90,9 +98,9 @@ def _handle_container(origin, args, defs):
|
|
|
90
98
|
return None
|
|
91
99
|
|
|
92
100
|
# %% ../01_funccall.ipynb
|
|
93
|
-
def _process_property(name, obj, props, req, defs):
|
|
101
|
+
def _process_property(name, obj, props, req, defs, evalable=False):
|
|
94
102
|
"Process a single property of the schema"
|
|
95
|
-
p = _param(name, obj)
|
|
103
|
+
p = _param(name, obj, evalable=evalable)
|
|
96
104
|
props[name] = p
|
|
97
105
|
if obj.default is empty: req[name] = True
|
|
98
106
|
|
|
@@ -103,14 +111,14 @@ def _process_property(name, obj, props, req, defs):
|
|
|
103
111
|
p.update(_handle_type(obj.anno, defs))
|
|
104
112
|
|
|
105
113
|
# %% ../01_funccall.ipynb
|
|
106
|
-
def _get_nested_schema(obj):
|
|
114
|
+
def _get_nested_schema(obj, evalable=False, skip_hidden=False):
|
|
107
115
|
"Generate nested JSON schema for a class or function"
|
|
108
116
|
d = docments(obj, full=True)
|
|
109
117
|
props, req, defs = {}, {}, {}
|
|
110
118
|
|
|
111
119
|
for n, o in d.items():
|
|
112
|
-
if n != 'return' and n != 'self':
|
|
113
|
-
_process_property(n, o, props, req, defs)
|
|
120
|
+
if n != 'return' and n != 'self' and not (skip_hidden and n.startswith('_')):
|
|
121
|
+
_process_property(n, o, props, req, defs, evalable=evalable)
|
|
114
122
|
|
|
115
123
|
tkw = {}
|
|
116
124
|
if isinstance(obj, type): tkw['title']=obj.__name__
|
|
@@ -120,15 +128,25 @@ def _get_nested_schema(obj):
|
|
|
120
128
|
return schema
|
|
121
129
|
|
|
122
130
|
# %% ../01_funccall.ipynb
|
|
123
|
-
def get_schema(
|
|
131
|
+
def get_schema(
|
|
132
|
+
f:Union[callable,dict], # Function to get schema for
|
|
133
|
+
pname='input_schema', # Key name for parameters
|
|
134
|
+
evalable=False, # stringify defaults that can't be literal_eval'd?
|
|
135
|
+
skip_hidden=False # skip parameters starting with '_'?
|
|
136
|
+
)->dict: # {'name':..., 'description':..., pname:...}
|
|
124
137
|
"Generate JSON schema for a class, function, or method"
|
|
125
138
|
if isinstance(f, dict): return f
|
|
126
|
-
schema = _get_nested_schema(f)
|
|
139
|
+
schema = _get_nested_schema(f, evalable=evalable, skip_hidden=skip_hidden)
|
|
127
140
|
desc = f.__doc__
|
|
128
141
|
assert desc, "Docstring missing!"
|
|
129
142
|
d = docments(f, full=True)
|
|
130
143
|
ret = d.pop('return')
|
|
131
|
-
|
|
144
|
+
has_type = (ret.anno is not empty) and (ret.anno is not None)
|
|
145
|
+
has_doc = ret.docment
|
|
146
|
+
if has_type or has_doc:
|
|
147
|
+
type_str = f'type: {_types(ret.anno)[0]}' if has_type else None
|
|
148
|
+
ret_str = f'{ret.docment} ({type_str})' if has_type and has_doc else (type_str if has_type else ret.docment)
|
|
149
|
+
desc += f'\n\nReturns:\n- {ret_str}'
|
|
132
150
|
return {"name": f.__name__, "description": desc, pname: schema}
|
|
133
151
|
|
|
134
152
|
# %% ../01_funccall.ipynb
|
toolslm/xml.py
CHANGED
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../00_xml.ipynb.
|
|
2
2
|
|
|
3
3
|
# %% auto 0
|
|
4
|
-
__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', '
|
|
5
|
-
'read_file', 'files2ctx', 'folder2ctx', '
|
|
4
|
+
__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'cells2xml', 'nb2xml', 'get_docstring', 'py2sigs',
|
|
5
|
+
'mk_doctype', 'mk_doc', 'docs_xml', 'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx',
|
|
6
|
+
'sym2pkgpath', 'sym2pkgctx', 'folder2ctx_cli', 'parse_gh_url', 'repo2ctx', 'repo2ctx_cli']
|
|
6
7
|
|
|
7
8
|
# %% ../00_xml.ipynb
|
|
8
|
-
import hashlib,xml.etree.ElementTree as ET
|
|
9
|
+
import hashlib, inspect, xml.etree.ElementTree as ET, ast
|
|
9
10
|
from collections import namedtuple
|
|
10
11
|
from ghapi.all import GhApi
|
|
11
12
|
|
|
12
13
|
from fastcore.utils import *
|
|
13
14
|
from fastcore.meta import delegates
|
|
14
15
|
from fastcore.xtras import hl_md
|
|
15
|
-
from fastcore.xml import to_xml, Document, Documents, Document_content, Src, Source,Out,Outs,Cell,Notebook,Md,Code
|
|
16
|
+
from fastcore.xml import to_xml, Document, Documents, Document_content, Src, Source,Out,Outs,Cell,Notebook,Md,Code,Raw
|
|
16
17
|
from fastcore.script import call_parse
|
|
17
18
|
|
|
18
19
|
# %% ../00_xml.ipynb
|
|
@@ -49,23 +50,55 @@ def cell2out(o):
|
|
|
49
50
|
if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')
|
|
50
51
|
|
|
51
52
|
# %% ../00_xml.ipynb
|
|
52
|
-
|
|
53
|
+
_ctfuns = {'code': Code, 'markdown': Md, 'raw': Raw}
|
|
54
|
+
|
|
55
|
+
def cell2xml(cell, out=True, ids=True, nums=False):
|
|
53
56
|
"Convert notebook cell to concise XML format"
|
|
54
57
|
src = ''.join(getattr(cell, 'source', ''))
|
|
55
|
-
|
|
56
|
-
|
|
58
|
+
if nums: src = '\n'.join(f'{i+1:6d} │ {l}' for i,l in enumerate(src.splitlines()))
|
|
59
|
+
f = _ctfuns[cell.cell_type]
|
|
60
|
+
kw = dict(id=cell.id) if ids and hasattr(cell, 'id') else {}
|
|
61
|
+
if not out: return f(src, **kw)
|
|
57
62
|
parts = [Source(src)]
|
|
58
63
|
out_items = L(getattr(cell,'outputs',[])).map(cell2out).filter()
|
|
59
64
|
if out_items: parts.append(Outs(*out_items))
|
|
60
|
-
return f(*parts)
|
|
65
|
+
return f(*parts, **kw)
|
|
61
66
|
|
|
62
67
|
# %% ../00_xml.ipynb
|
|
63
|
-
|
|
68
|
+
@delegates(cell2xml)
|
|
69
|
+
def cells2xml(cells, wrap=Notebook, **kwargs):
|
|
70
|
+
"Convert notebook to XML format"
|
|
71
|
+
res = [cell2xml(c, **kwargs) for c in cells]
|
|
72
|
+
return to_xml(wrap(*res), do_escape=False)
|
|
73
|
+
|
|
74
|
+
@delegates(cell2xml)
|
|
75
|
+
def nb2xml(fname=None, nb=None, **kwargs):
|
|
64
76
|
"Convert notebook to XML format"
|
|
65
77
|
assert bool(fname)^bool(nb), "Pass either `fname` or `nb`"
|
|
66
78
|
if not nb: nb = dict2obj(fname.read_json())
|
|
67
|
-
|
|
68
|
-
|
|
79
|
+
return cells2xml(nb.cells, **kwargs)
|
|
80
|
+
|
|
81
|
+
# %% ../00_xml.ipynb
|
|
82
|
+
def get_docstring(node, lines):
|
|
83
|
+
"Get docstring from source lines if present"
|
|
84
|
+
if not (node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant)): return None
|
|
85
|
+
doc_node = node.body[0]
|
|
86
|
+
return '\n'.join(lines[doc_node.lineno-1:doc_node.end_lineno])
|
|
87
|
+
|
|
88
|
+
def py2sigs(fname=None, src=None):
|
|
89
|
+
"Return signature+docstring text for all functions and class methods in source"
|
|
90
|
+
if fname: src = Path(fname).expanduser().read_text()
|
|
91
|
+
tree = ast.parse(src)
|
|
92
|
+
lines = src.splitlines()
|
|
93
|
+
res = []
|
|
94
|
+
for node in ast.walk(tree):
|
|
95
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
96
|
+
body_start = max(node.body[0].lineno - 1, node.lineno)
|
|
97
|
+
sig = '\n'.join(lines[node.lineno-1:body_start])
|
|
98
|
+
doc = get_docstring(node, lines)
|
|
99
|
+
cts = f"{sig}\n{doc}" if doc else sig
|
|
100
|
+
res.append(cts.strip('\r\n'))
|
|
101
|
+
return '\n\n'.join(res)
|
|
69
102
|
|
|
70
103
|
# %% ../00_xml.ipynb
|
|
71
104
|
doctype = namedtuple('doctype', ['src', 'content'])
|
|
@@ -101,7 +134,7 @@ def mk_doc(index:int, # The document index
|
|
|
101
134
|
# %% ../00_xml.ipynb
|
|
102
135
|
def docs_xml(docs:list[str], # The content of each document
|
|
103
136
|
srcs:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
|
|
104
|
-
prefix:bool=
|
|
137
|
+
prefix:bool=False, # Include Anthropic's suggested prose intro?
|
|
105
138
|
details:Optional[list]=None, # Optional list of dicts with additional attrs for each doc
|
|
106
139
|
title:str=None # Optional title attr for Documents element
|
|
107
140
|
)->str:
|
|
@@ -109,80 +142,155 @@ def docs_xml(docs:list[str], # The content of each document
|
|
|
109
142
|
pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
|
|
110
143
|
if srcs is None: srcs = [None]*len(docs)
|
|
111
144
|
if details is None: details = [{}]*len(docs)
|
|
112
|
-
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
|
|
145
|
+
docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)) if d.strip())
|
|
113
146
|
kw = dict(title=title) if title else {}
|
|
114
147
|
return pre + to_xml(Documents(*docs, **kw), do_escape=False)
|
|
115
148
|
|
|
116
149
|
# %% ../00_xml.ipynb
|
|
117
|
-
|
|
150
|
+
@delegates(nb2xml)
|
|
151
|
+
def read_file(fname, max_size=None, sigs_only=False, **kwargs):
|
|
118
152
|
"Read file content, converting notebooks to XML if needed"
|
|
119
|
-
fname = Path(fname)
|
|
120
|
-
if fname.suffix == '.ipynb': res = nb2xml(fname,
|
|
153
|
+
fname = Path(fname).expanduser()
|
|
154
|
+
if fname.suffix == '.ipynb': res = nb2xml(fname, **kwargs)
|
|
155
|
+
elif fname.suffix == '.py' and sigs_only: res = py2sigs(fname)
|
|
121
156
|
else: res = fname.read_text()
|
|
122
157
|
if max_size and len(res)>max_size: return f"[Skipped: {fname.name} exceeds {max_size} bytes]"
|
|
123
158
|
return res
|
|
124
159
|
|
|
125
160
|
# %% ../00_xml.ipynb
|
|
161
|
+
@delegates(docs_xml)
|
|
126
162
|
def files2ctx(
|
|
127
163
|
fnames:list[Union[str,Path]], # List of file names to add to context
|
|
128
|
-
prefix:bool=True, # Include Anthropic's suggested prose intro?
|
|
129
|
-
out:bool=True, # Include notebook cell outputs?
|
|
130
164
|
srcs:Optional[list]=None, # Use the labels instead of `fnames`
|
|
131
|
-
|
|
132
|
-
|
|
165
|
+
max_size:int=None, # Skip files larger than this (bytes)
|
|
166
|
+
out:bool=True, # Include notebook cell outputs?
|
|
167
|
+
ids:bool=True, # Include cell ids in notebooks?
|
|
168
|
+
nums:bool=False, # Include line numbers in notebook cell source?
|
|
169
|
+
sigs_only:bool=False, # For .py files, only include signatures and docstrings
|
|
170
|
+
**kwargs
|
|
133
171
|
)->str: # XML for LM context
|
|
134
172
|
"Convert files to XML context, handling notebooks"
|
|
135
|
-
fnames = [Path(o) for o in fnames]
|
|
136
|
-
contents = [read_file(o, out=out,
|
|
137
|
-
return docs_xml(contents, srcs or fnames,
|
|
173
|
+
fnames = [Path(o).expanduser() for o in listify(fnames)]
|
|
174
|
+
contents = [read_file(o, max_size=max_size, out=out, ids=ids, sigs_only=sigs_only, nums=nums) for o in fnames]
|
|
175
|
+
return docs_xml(contents, srcs or fnames, **kwargs)
|
|
138
176
|
|
|
139
177
|
# %% ../00_xml.ipynb
|
|
140
|
-
@delegates(globtastic)
|
|
178
|
+
@delegates(globtastic, but='func')
|
|
141
179
|
def folder2ctx(
|
|
142
|
-
|
|
143
|
-
prefix:bool=
|
|
180
|
+
path:Union[str,Path], # Folder to read
|
|
181
|
+
prefix:bool=False, # Include Anthropic's suggested prose intro?
|
|
144
182
|
out:bool=True, # Include notebook cell outputs?
|
|
145
183
|
include_base:bool=True, # Include full path in src?
|
|
146
184
|
title:str=None, # Optional title attr for Documents element
|
|
147
185
|
max_size:int=100_000, # Skip files larger than this (bytes)
|
|
186
|
+
max_total:int=10_000_000, # Max total output size in bytes
|
|
187
|
+
readme_first:bool=False, # Prioritize README files at start of context?
|
|
188
|
+
files_only:bool=False, # Return dict of {filename: size} instead of context?
|
|
189
|
+
sigs_only:bool=False, # Return signatures instead of full text for python files?
|
|
190
|
+
ids:bool=True, # Include cell ids in notebooks?
|
|
148
191
|
**kwargs
|
|
149
|
-
)->str:
|
|
192
|
+
)->Union[str,dict]:
|
|
150
193
|
"Convert folder contents to XML context, handling notebooks"
|
|
151
|
-
folder = Path(
|
|
152
|
-
fnames =
|
|
153
|
-
|
|
154
|
-
|
|
194
|
+
folder = Path(path).expanduser()
|
|
195
|
+
fnames = pglob(folder, **kwargs)
|
|
196
|
+
if files_only: return {str(f.relative_to(folder)): f.stat().st_size for f in fnames}
|
|
197
|
+
if readme_first: fnames = sorted(fnames, key=lambda f: (0 if 'readme' in f.name.lower() else 1, f))
|
|
198
|
+
srcs = fnames if include_base else [f.relative_to(folder) for f in fnames]
|
|
199
|
+
res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size, sigs_only=sigs_only, ids=ids)
|
|
200
|
+
suf = f"\n\n[TRUNCATED: output size {{_outsz_}} exceeded max size {max_total} bytes]"
|
|
201
|
+
if max_total and len(res) > max_total: res = truncstr(res, max_total, suf=suf, sizevar='_outsz_')
|
|
202
|
+
return res
|
|
203
|
+
|
|
204
|
+
# %% ../00_xml.ipynb
|
|
205
|
+
def sym2file(sym):
|
|
206
|
+
"Return md string with filepath and contents for a symbol's source file"
|
|
207
|
+
f = Path(inspect.getfile(sym))
|
|
208
|
+
return f"- `{f}`\n\n````\n{f.read_text()}\n````"
|
|
209
|
+
|
|
210
|
+
# %% ../00_xml.ipynb
|
|
211
|
+
@delegates(folder2ctx)
|
|
212
|
+
def sym2folderctx(
|
|
213
|
+
sym,
|
|
214
|
+
types:str|list='py', # list or comma-separated str of ext types from: py, js, java, c, cpp, rb, r, ex, sh, web, doc, cfg
|
|
215
|
+
skip_file_re=r'^_mod',
|
|
216
|
+
**kwargs):
|
|
217
|
+
"Return folder context for a symbol's source file location"
|
|
218
|
+
return folder2ctx(Path(inspect.getfile(sym)).parent, types=types, skip_file_re=skip_file_re, **kwargs)
|
|
219
|
+
|
|
220
|
+
# %% ../00_xml.ipynb
|
|
221
|
+
def sym2pkgpath(sym):
|
|
222
|
+
"Get root package path for a symbol"
|
|
223
|
+
root = sym.__module__.split('.')[0]
|
|
224
|
+
return Path(sys.modules[root].__path__[0])
|
|
225
|
+
|
|
226
|
+
# %% ../00_xml.ipynb
|
|
227
|
+
@delegates(folder2ctx)
|
|
228
|
+
def sym2pkgctx(sym, types:str|list='py', skip_file_re=r'^_mod', **kwargs):
|
|
229
|
+
"Return repo context for a symbol's root package"
|
|
230
|
+
return folder2ctx(sym2pkgpath(sym), types=types, skip_file_re=skip_file_re, **kwargs)
|
|
231
|
+
|
|
232
|
+
# %% ../00_xml.ipynb
|
|
233
|
+
@call_parse
|
|
234
|
+
@delegates(folder2ctx)
|
|
235
|
+
def folder2ctx_cli(
|
|
236
|
+
path:str='.', # Folder name containing files to add to context
|
|
237
|
+
out:bool=True, # Include notebook cell outputs?
|
|
238
|
+
**kwargs # Passed to `folder2ctx`
|
|
239
|
+
)->str: # XML for Claude context
|
|
240
|
+
"CLI to convert folder contents to XML context, handling notebooks"
|
|
241
|
+
print(folder2ctx(path, out=out, **kwargs))
|
|
242
|
+
|
|
243
|
+
# %% ../00_xml.ipynb
|
|
244
|
+
def parse_gh_url(url):
|
|
245
|
+
"Parse GitHub URL into (owner, repo, type, ref, path) or None"
|
|
246
|
+
m = re.match(r'https?://(?:www\.)?github\.com/([^/]+)/([^/]+)(?:/([^/]+)(?:/([^/]+)(?:/(.+))?)?)?', url)
|
|
247
|
+
return dict(zip('owner repo typ ref path'.split(), m.groups())) if m else None
|
|
155
248
|
|
|
156
249
|
# %% ../00_xml.ipynb
|
|
157
250
|
@delegates(folder2ctx)
|
|
158
251
|
def repo2ctx(
|
|
159
|
-
owner:str, # GitHub repo owner
|
|
160
|
-
repo:str, # GitHub repo name
|
|
161
|
-
ref:str=None, # Git ref (branch/tag/sha); defaults to repo's default branch
|
|
252
|
+
owner:str, # GitHub repo owner or "owner/repo" or a full github URL
|
|
253
|
+
repo:str=None, # GitHub repo name (leave empty if using "owner/repo" or URL format for owner param)
|
|
254
|
+
ref:str=None, # Git ref (branch/tag/sha) (get from URL not provided); defaults to repo's default branch
|
|
255
|
+
folder:str=None, # Only include files under this path (get from URL not provided)
|
|
256
|
+
show_filters:bool=True, # Include filter info in title?
|
|
257
|
+
token:str=None, # GitHub token (uses GITHUB_TOKEN env var if None)
|
|
162
258
|
**kwargs # Passed to `folder2ctx`
|
|
163
|
-
)->str: # XML for LM context
|
|
259
|
+
)->Union[str,dict]: # XML for LM context, or dict of file sizes
|
|
164
260
|
"Convert GitHub repo to XML context without cloning"
|
|
165
261
|
import tempfile, tarfile, io
|
|
166
|
-
|
|
262
|
+
if owner.startswith('http'):
|
|
263
|
+
parsed = parse_gh_url(owner)
|
|
264
|
+
if not parsed: raise ValueError(f"Invalid GitHub URL: {owner}")
|
|
265
|
+
owner,repo = parsed['owner'], parsed['repo']
|
|
266
|
+
ref = ref or parsed.get('ref')
|
|
267
|
+
folder = folder or parsed.get('path')
|
|
268
|
+
if repo is None: owner, repo = owner.split('/')
|
|
269
|
+
api = GhApi(token=token)
|
|
167
270
|
if ref is None: ref = api.repos.get(owner, repo).default_branch
|
|
168
271
|
data = api.repos.download_tarball_archive(owner, repo, ref)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
272
|
+
title = f"GitHub repository contents from {owner}/{repo}/{ref}"
|
|
273
|
+
if folder: title += f'/{folder}'
|
|
274
|
+
if show_filters:
|
|
275
|
+
parts = [f"{k}: {', '.join(v) if isinstance(v, (list,tuple)) else v}" for k,v in kwargs.items() if v]
|
|
276
|
+
if parts: title += f" (filters applied -- {' | '.join(parts)})"
|
|
173
277
|
tf = tarfile.open(fileobj=io.BytesIO(data))
|
|
174
278
|
with tempfile.TemporaryDirectory() as tmp:
|
|
175
279
|
tf.extractall(tmp, filter='data')
|
|
176
280
|
subdir = Path(tmp) / tf.getmembers()[0].name.split('/')[0]
|
|
177
|
-
|
|
281
|
+
if folder: subdir = subdir/folder
|
|
282
|
+
return folder2ctx(subdir, include_base=False, title=title, readme_first=True, **kwargs)
|
|
178
283
|
|
|
179
284
|
# %% ../00_xml.ipynb
|
|
180
285
|
@call_parse
|
|
181
|
-
@delegates(
|
|
182
|
-
def
|
|
183
|
-
|
|
286
|
+
@delegates(repo2ctx, but='include_base,title,readme_first')
|
|
287
|
+
def repo2ctx_cli(
|
|
288
|
+
owner:str, # GitHub repo owner or "owner/repo" or a full github URL
|
|
289
|
+
repo:str=None, # GitHub repo name (leave empty if using "owner/repo" or URL format)
|
|
290
|
+
ref:str=None, # Git ref (branch/tag/sha)
|
|
291
|
+
folder:str=None, # Only include files under this path
|
|
184
292
|
out:bool=True, # Include notebook cell outputs?
|
|
185
|
-
**kwargs # Passed to `
|
|
293
|
+
**kwargs # Passed to `repo2ctx`
|
|
186
294
|
)->str: # XML for Claude context
|
|
187
|
-
"CLI to convert
|
|
188
|
-
print(
|
|
295
|
+
"CLI to convert GitHub repo contents to XML context"
|
|
296
|
+
print(repo2ctx(owner, repo, ref=ref, folder=folder, out=out, **kwargs))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: toolslm
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.23
|
|
4
4
|
Summary: Tools to make language models a bit easier to use
|
|
5
5
|
Home-page: https://github.com/AnswerDotAI/toolslm
|
|
6
6
|
Author: Jeremy Howard
|
|
@@ -16,8 +16,9 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: fastcore>=1.9.
|
|
19
|
+
Requires-Dist: fastcore>=1.9.7
|
|
20
20
|
Requires-Dist: httpx
|
|
21
|
+
Requires-Dist: ghapi
|
|
21
22
|
Provides-Extra: dev
|
|
22
23
|
Dynamic: author
|
|
23
24
|
Dynamic: author-email
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
toolslm/__init__.py,sha256=YEBEORM81hFUTm-XaZztBUzkIBn7JXmZ6MQPnR7XWdE,23
|
|
2
|
+
toolslm/_modidx.py,sha256=Gf9CRS_oX2eqJeO_9MedzlVEtUSZEeVnMQJZuWcTWw4,6200
|
|
3
|
+
toolslm/download.py,sha256=yMhyY3u26XRr6a4eZuCCmkprS7LQhHASl01Zn2B4q_o,4481
|
|
4
|
+
toolslm/funccall.py,sha256=Xoulo5xmYUpuqxm6ssvpABeGs8Hx3nyDah-FI9HlxlY,11400
|
|
5
|
+
toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
|
|
6
|
+
toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
|
|
7
|
+
toolslm/xml.py,sha256=WLKnM_c7If6rG7MarFpYz9_0nsyaXSP8FUVfPv1sC7I,13487
|
|
8
|
+
toolslm-0.3.23.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
toolslm-0.3.23.dist-info/METADATA,sha256=n-qbMh7K29vHc50KxHqZxho4iWRRHFvRFKHgk4dW3XU,2425
|
|
10
|
+
toolslm-0.3.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
toolslm-0.3.23.dist-info/entry_points.txt,sha256=L77QoeUC_BjrE3BVY-Wpi4RMq_iwfX_1eAWchc6Zsmw,131
|
|
12
|
+
toolslm-0.3.23.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
+
toolslm-0.3.23.dist-info/RECORD,,
|
toolslm-0.3.9.dist-info/RECORD
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
toolslm/__init__.py,sha256=xmkmdvq15kb61xdtCoa1YARnvHBnUgI-0GWIJYvHNeA,22
|
|
2
|
-
toolslm/_modidx.py,sha256=kpgsDpj-Tvn90wezrHaMttyzhNcyNVgw_dQgK10qotI,5308
|
|
3
|
-
toolslm/download.py,sha256=g3BxUSxylC_575M7RFSJ1GI3Co3EwPDdEeWzxaf2Czk,4451
|
|
4
|
-
toolslm/funccall.py,sha256=0OBrx6KzI0KK13L-5Hn69yah9oZhgTsKchmMenCoT0A,10421
|
|
5
|
-
toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
|
|
6
|
-
toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
|
|
7
|
-
toolslm/xml.py,sha256=tAHoqXrTRiX8i3pR-9KpHoBb8QXJ_TKEVyTEOPviudE,8095
|
|
8
|
-
toolslm-0.3.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
-
toolslm-0.3.9.dist-info/METADATA,sha256=djSwIqYu8Taj8g0yyXKw3IqFr_fbAKhbI3aQu14kv9U,2403
|
|
10
|
-
toolslm-0.3.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
-
toolslm-0.3.9.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
-
toolslm-0.3.9.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
-
toolslm-0.3.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|