toolslm 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolslm/__init__.py +1 -1
- toolslm/_modidx.py +1 -0
- toolslm/download.py +1 -0
- toolslm/xml.py +57 -31
- {toolslm-0.3.11.dist-info → toolslm-0.3.13.dist-info}/METADATA +2 -2
- toolslm-0.3.13.dist-info/RECORD +13 -0
- toolslm-0.3.11.dist-info/RECORD +0 -13
- {toolslm-0.3.11.dist-info → toolslm-0.3.13.dist-info}/WHEEL +0 -0
- {toolslm-0.3.11.dist-info → toolslm-0.3.13.dist-info}/entry_points.txt +0 -0
- {toolslm-0.3.11.dist-info → toolslm-0.3.13.dist-info}/licenses/LICENSE +0 -0
- {toolslm-0.3.11.dist-info → toolslm-0.3.13.dist-info}/top_level.txt +0 -0
toolslm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.3.
|
|
1
|
+
__version__ = "0.3.13"
|
toolslm/_modidx.py
CHANGED
|
@@ -49,5 +49,6 @@ d = { 'settings': { 'branch': 'main',
|
|
|
49
49
|
'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
|
|
50
50
|
'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
|
|
51
51
|
'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
|
|
52
|
+
'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
|
|
52
53
|
'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
|
|
53
54
|
'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py')}}}
|
toolslm/download.py
CHANGED
toolslm/xml.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# %% auto 0
|
|
4
4
|
__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'mk_doctype', 'mk_doc', 'docs_xml',
|
|
5
|
-
'read_file', 'files2ctx', 'folder2ctx', '
|
|
5
|
+
'read_file', 'files2ctx', 'folder2ctx', 'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
|
|
6
6
|
|
|
7
7
|
# %% ../00_xml.ipynb
|
|
8
8
|
import hashlib,xml.etree.ElementTree as ET
|
|
@@ -101,7 +101,7 @@ def mk_doc(index:int, # The document index
|
|
|
101
101
|
# %% ../00_xml.ipynb
|
|
102
102
|
def docs_xml(docs:list[str], # The content of each document
|
|
103
103
|
srcs:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
|
|
104
|
-
prefix:bool=
|
|
104
|
+
prefix:bool=False, # Include Anthropic's suggested prose intro?
|
|
105
105
|
details:Optional[list]=None, # Optional list of dicts with additional attrs for each doc
|
|
106
106
|
title:str=None # Optional title attr for Documents element
|
|
107
107
|
)->str:
|
|
@@ -123,66 +123,92 @@ def read_file(fname, out=True, max_size=None):
|
|
|
123
123
|
return res
|
|
124
124
|
|
|
125
125
|
# %% ../00_xml.ipynb
|
|
126
|
+
@delegates(docs_xml)
|
|
126
127
|
def files2ctx(
|
|
127
128
|
fnames:list[Union[str,Path]], # List of file names to add to context
|
|
128
|
-
prefix:bool=True, # Include Anthropic's suggested prose intro?
|
|
129
129
|
out:bool=True, # Include notebook cell outputs?
|
|
130
130
|
srcs:Optional[list]=None, # Use the labels instead of `fnames`
|
|
131
|
-
|
|
132
|
-
|
|
131
|
+
max_size:int=None, # Skip files larger than this (bytes)
|
|
132
|
+
**kwargs
|
|
133
133
|
)->str: # XML for LM context
|
|
134
134
|
"Convert files to XML context, handling notebooks"
|
|
135
135
|
fnames = [Path(o) for o in fnames]
|
|
136
136
|
contents = [read_file(o, out=out, max_size=max_size) for o in fnames]
|
|
137
|
-
return docs_xml(contents, srcs or fnames,
|
|
137
|
+
return docs_xml(contents, srcs or fnames, **kwargs)
|
|
138
138
|
|
|
139
139
|
# %% ../00_xml.ipynb
|
|
140
140
|
@delegates(globtastic)
|
|
141
141
|
def folder2ctx(
|
|
142
|
-
folder:Union[str,Path],
|
|
143
|
-
prefix:bool=
|
|
142
|
+
folder:Union[str,Path], # Folder to read
|
|
143
|
+
prefix:bool=False, # Include Anthropic's suggested prose intro?
|
|
144
144
|
out:bool=True, # Include notebook cell outputs?
|
|
145
145
|
include_base:bool=True, # Include full path in src?
|
|
146
146
|
title:str=None, # Optional title attr for Documents element
|
|
147
147
|
max_size:int=100_000, # Skip files larger than this (bytes)
|
|
148
|
+
max_total:int=10_000_000, # Max total output size in bytes
|
|
149
|
+
readme_first:bool=False, # Prioritize README files at start of context?
|
|
150
|
+
files_only:bool=False, # Return dict of {filename: size} instead of context?
|
|
148
151
|
**kwargs
|
|
149
|
-
)->str:
|
|
152
|
+
)->Union[str,dict]:
|
|
150
153
|
"Convert folder contents to XML context, handling notebooks"
|
|
151
154
|
folder = Path(folder)
|
|
152
155
|
fnames = globtastic(folder, **kwargs)
|
|
156
|
+
if files_only: return {str(Path(f).relative_to(folder)): Path(f).stat().st_size for f in fnames}
|
|
157
|
+
if readme_first: fnames = sorted(fnames, key=lambda f: (0 if 'readme' in Path(f).name.lower() else 1, f))
|
|
153
158
|
srcs = fnames if include_base else [Path(f).relative_to(folder) for f in fnames]
|
|
154
|
-
|
|
159
|
+
res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size)
|
|
160
|
+
suf = f"\n\n[TRUNCATED: output size {{_outsz_}} exceeded max size {max_total} bytes]"
|
|
161
|
+
if max_total and len(res) > max_total: res = truncstr(res, max_total, suf=suf, sizevar='_outsz_')
|
|
162
|
+
return res
|
|
163
|
+
|
|
164
|
+
# %% ../00_xml.ipynb
|
|
165
|
+
@call_parse
|
|
166
|
+
@delegates(folder2ctx)
|
|
167
|
+
def folder2ctx_cli(
|
|
168
|
+
folder:str, # Folder name containing files to add to context
|
|
169
|
+
out:bool=True, # Include notebook cell outputs?
|
|
170
|
+
**kwargs # Passed to `folder2ctx`
|
|
171
|
+
)->str: # XML for Claude context
|
|
172
|
+
"CLI to convert folder contents to XML context, handling notebooks"
|
|
173
|
+
print(folder2ctx(folder, out=out, **kwargs))
|
|
174
|
+
|
|
175
|
+
# %% ../00_xml.ipynb
|
|
176
|
+
def parse_gh_url(url):
|
|
177
|
+
"Parse GitHub URL into (owner, repo, type, ref, path) or None"
|
|
178
|
+
m = re.match(r'https?://(?:www\.)?github\.com/([^/]+)/([^/]+)(?:/([^/]+)(?:/([^/]+)(?:/(.+))?)?)?', url)
|
|
179
|
+
return dict(zip('owner repo typ ref path'.split(), m.groups())) if m else None
|
|
155
180
|
|
|
156
181
|
# %% ../00_xml.ipynb
|
|
157
182
|
@delegates(folder2ctx)
|
|
158
183
|
def repo2ctx(
|
|
159
|
-
owner:str, # GitHub repo owner
|
|
160
|
-
repo:str, # GitHub repo name
|
|
161
|
-
ref:str=None, # Git ref (branch/tag/sha); defaults to repo's default branch
|
|
184
|
+
owner:str, # GitHub repo owner or "owner/repo" or a full github URL
|
|
185
|
+
repo:str=None, # GitHub repo name (leave empty if using "owner/repo" or URL format for owner param)
|
|
186
|
+
ref:str=None, # Git ref (branch/tag/sha) (get from URL not provided); defaults to repo's default branch
|
|
187
|
+
folder:str=None, # Only include files under this path (get from URL not provided)
|
|
188
|
+
show_filters:bool=True, # Include filter info in title?
|
|
189
|
+
token:str=None, # GitHub token (uses GITHUB_TOKEN env var if None)
|
|
162
190
|
**kwargs # Passed to `folder2ctx`
|
|
163
|
-
)->str: # XML for LM context
|
|
191
|
+
)->Union[str,dict]: # XML for LM context, or dict of file sizes
|
|
164
192
|
"Convert GitHub repo to XML context without cloning"
|
|
165
193
|
import tempfile, tarfile, io
|
|
166
|
-
|
|
194
|
+
if owner.startswith('http'):
|
|
195
|
+
parsed = parse_gh_url(owner)
|
|
196
|
+
if not parsed: raise ValueError(f"Invalid GitHub URL: {owner}")
|
|
197
|
+
owner,repo = parsed['owner'], parsed['repo']
|
|
198
|
+
ref = ref or parsed.get('ref')
|
|
199
|
+
folder = folder or parsed.get('path')
|
|
200
|
+
if repo is None: owner, repo = owner.split('/')
|
|
201
|
+
api = GhApi(token=token)
|
|
167
202
|
if ref is None: ref = api.repos.get(owner, repo).default_branch
|
|
168
203
|
data = api.repos.download_tarball_archive(owner, repo, ref)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
204
|
+
title = f"GitHub repository contents from {owner}/{repo}/{ref}"
|
|
205
|
+
if folder: title += f'/{folder}'
|
|
206
|
+
if show_filters:
|
|
207
|
+
parts = [f"{k}: {', '.join(v) if isinstance(v, (list,tuple)) else v}" for k,v in kwargs.items() if v]
|
|
208
|
+
if parts: title += f" (filters applied -- {' | '.join(parts)})"
|
|
173
209
|
tf = tarfile.open(fileobj=io.BytesIO(data))
|
|
174
210
|
with tempfile.TemporaryDirectory() as tmp:
|
|
175
211
|
tf.extractall(tmp, filter='data')
|
|
176
212
|
subdir = Path(tmp) / tf.getmembers()[0].name.split('/')[0]
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
# %% ../00_xml.ipynb
|
|
180
|
-
@call_parse
|
|
181
|
-
@delegates(folder2ctx)
|
|
182
|
-
def folder2ctx_cli(
|
|
183
|
-
folder:str, # Folder name containing files to add to context
|
|
184
|
-
out:bool=True, # Include notebook cell outputs?
|
|
185
|
-
**kwargs # Passed to `folder2ctx`
|
|
186
|
-
)->str: # XML for Claude context
|
|
187
|
-
"CLI to convert folder contents to XML context, handling notebooks"
|
|
188
|
-
print(folder2ctx(folder, out=out, **kwargs))
|
|
213
|
+
if folder: subdir = subdir/folder
|
|
214
|
+
return folder2ctx(subdir, include_base=False, title=title, readme_first=True, **kwargs)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: toolslm
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.13
|
|
4
4
|
Summary: Tools to make language models a bit easier to use
|
|
5
5
|
Home-page: https://github.com/AnswerDotAI/toolslm
|
|
6
6
|
Author: Jeremy Howard
|
|
@@ -16,7 +16,7 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: fastcore>=1.9.
|
|
19
|
+
Requires-Dist: fastcore>=1.9.7
|
|
20
20
|
Requires-Dist: httpx
|
|
21
21
|
Provides-Extra: dev
|
|
22
22
|
Dynamic: author
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
toolslm/__init__.py,sha256=jXmhGysidmiPOxLAzcyzQjjT98GxaQk2jHMECTsHr04,23
|
|
2
|
+
toolslm/_modidx.py,sha256=DMBoEHx7c0EvxmSiLYbi0Grd8hjSQKiuK4xJdfa1rFo,5410
|
|
3
|
+
toolslm/download.py,sha256=yMhyY3u26XRr6a4eZuCCmkprS7LQhHASl01Zn2B4q_o,4481
|
|
4
|
+
toolslm/funccall.py,sha256=_5TyhTjWaWLi-eJ96-4P3_faFv6Ft07nO60UjCF-bPU,11160
|
|
5
|
+
toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
|
|
6
|
+
toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
|
|
7
|
+
toolslm/xml.py,sha256=5sT-rRwpYTNpxNnZPr0QqIS2uZAAjsR3aE3c90nXzWA,9820
|
|
8
|
+
toolslm-0.3.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
toolslm-0.3.13.dist-info/METADATA,sha256=uWzDigLPr9FoLJUdC3oXk8KfioKRfm4wTJi8k9K52iM,2404
|
|
10
|
+
toolslm-0.3.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
toolslm-0.3.13.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
+
toolslm-0.3.13.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
+
toolslm-0.3.13.dist-info/RECORD,,
|
toolslm-0.3.11.dist-info/RECORD
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
toolslm/__init__.py,sha256=TESjMH0a_iUkwdfWT4nyzKizSFmmCY2omxnS2XyT97Y,23
|
|
2
|
-
toolslm/_modidx.py,sha256=kpgsDpj-Tvn90wezrHaMttyzhNcyNVgw_dQgK10qotI,5308
|
|
3
|
-
toolslm/download.py,sha256=g3BxUSxylC_575M7RFSJ1GI3Co3EwPDdEeWzxaf2Czk,4451
|
|
4
|
-
toolslm/funccall.py,sha256=_5TyhTjWaWLi-eJ96-4P3_faFv6Ft07nO60UjCF-bPU,11160
|
|
5
|
-
toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
|
|
6
|
-
toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
|
|
7
|
-
toolslm/xml.py,sha256=tAHoqXrTRiX8i3pR-9KpHoBb8QXJ_TKEVyTEOPviudE,8095
|
|
8
|
-
toolslm-0.3.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
-
toolslm-0.3.11.dist-info/METADATA,sha256=LrsRNEumsWCi0JnHLTn73b8GoqjwQ3m6mDak4n2h47g,2404
|
|
10
|
-
toolslm-0.3.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
-
toolslm-0.3.11.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
-
toolslm-0.3.11.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
-
toolslm-0.3.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|