toolslm 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toolslm/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.11"
1
+ __version__ = "0.3.13"
toolslm/_modidx.py CHANGED
@@ -49,5 +49,6 @@ d = { 'settings': { 'branch': 'main',
49
49
  'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
50
50
  'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
51
51
  'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
52
+ 'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
52
53
  'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
53
54
  'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py')}}}
toolslm/download.py CHANGED
@@ -8,6 +8,7 @@ from fastcore.utils import *
8
8
  from httpx import get
9
9
  from fastcore.meta import delegates
10
10
  from urllib.parse import urlparse, urljoin
11
+ from .xml import parse_gh_url
11
12
 
12
13
  # %% ../03_download.ipynb
13
14
  def clean_md(text, rm_comments=True, rm_details=True):
toolslm/xml.py CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # %% auto 0
4
4
  __all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'mk_doctype', 'mk_doc', 'docs_xml',
5
- 'read_file', 'files2ctx', 'folder2ctx', 'repo2ctx', 'folder2ctx_cli']
5
+ 'read_file', 'files2ctx', 'folder2ctx', 'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
6
6
 
7
7
  # %% ../00_xml.ipynb
8
8
  import hashlib,xml.etree.ElementTree as ET
@@ -101,7 +101,7 @@ def mk_doc(index:int, # The document index
101
101
  # %% ../00_xml.ipynb
102
102
  def docs_xml(docs:list[str], # The content of each document
103
103
  srcs:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
104
- prefix:bool=True, # Include Anthropic's suggested prose intro?
104
+ prefix:bool=False, # Include Anthropic's suggested prose intro?
105
105
  details:Optional[list]=None, # Optional list of dicts with additional attrs for each doc
106
106
  title:str=None # Optional title attr for Documents element
107
107
  )->str:
@@ -123,66 +123,92 @@ def read_file(fname, out=True, max_size=None):
123
123
  return res
124
124
 
125
125
  # %% ../00_xml.ipynb
126
+ @delegates(docs_xml)
126
127
  def files2ctx(
127
128
  fnames:list[Union[str,Path]], # List of file names to add to context
128
- prefix:bool=True, # Include Anthropic's suggested prose intro?
129
129
  out:bool=True, # Include notebook cell outputs?
130
130
  srcs:Optional[list]=None, # Use the labels instead of `fnames`
131
- title:str=None, # Optional title attr for Documents element
132
- max_size:int=None # Skip files larger than this (bytes)
131
+ max_size:int=None, # Skip files larger than this (bytes)
132
+ **kwargs
133
133
  )->str: # XML for LM context
134
134
  "Convert files to XML context, handling notebooks"
135
135
  fnames = [Path(o) for o in fnames]
136
136
  contents = [read_file(o, out=out, max_size=max_size) for o in fnames]
137
- return docs_xml(contents, srcs or fnames, prefix=prefix, title=title)
137
+ return docs_xml(contents, srcs or fnames, **kwargs)
138
138
 
139
139
  # %% ../00_xml.ipynb
140
140
  @delegates(globtastic)
141
141
  def folder2ctx(
142
- folder:Union[str,Path],
143
- prefix:bool=True, # Include Anthropic's suggested prose intro?
142
+ folder:Union[str,Path], # Folder to read
143
+ prefix:bool=False, # Include Anthropic's suggested prose intro?
144
144
  out:bool=True, # Include notebook cell outputs?
145
145
  include_base:bool=True, # Include full path in src?
146
146
  title:str=None, # Optional title attr for Documents element
147
147
  max_size:int=100_000, # Skip files larger than this (bytes)
148
+ max_total:int=10_000_000, # Max total output size in bytes
149
+ readme_first:bool=False, # Prioritize README files at start of context?
150
+ files_only:bool=False, # Return dict of {filename: size} instead of context?
148
151
  **kwargs
149
- )->str:
152
+ )->Union[str,dict]:
150
153
  "Convert folder contents to XML context, handling notebooks"
151
154
  folder = Path(folder)
152
155
  fnames = globtastic(folder, **kwargs)
156
+ if files_only: return {str(Path(f).relative_to(folder)): Path(f).stat().st_size for f in fnames}
157
+ if readme_first: fnames = sorted(fnames, key=lambda f: (0 if 'readme' in Path(f).name.lower() else 1, f))
153
158
  srcs = fnames if include_base else [Path(f).relative_to(folder) for f in fnames]
154
- return files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size)
159
+ res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size)
160
+ suf = f"\n\n[TRUNCATED: output size {{_outsz_}} exceeded max size {max_total} bytes]"
161
+ if max_total and len(res) > max_total: res = truncstr(res, max_total, suf=suf, sizevar='_outsz_')
162
+ return res
163
+
164
+ # %% ../00_xml.ipynb
165
+ @call_parse
166
+ @delegates(folder2ctx)
167
+ def folder2ctx_cli(
168
+ folder:str, # Folder name containing files to add to context
169
+ out:bool=True, # Include notebook cell outputs?
170
+ **kwargs # Passed to `folder2ctx`
171
+ )->str: # XML for Claude context
172
+ "CLI to convert folder contents to XML context, handling notebooks"
173
+ print(folder2ctx(folder, out=out, **kwargs))
174
+
175
+ # %% ../00_xml.ipynb
176
+ def parse_gh_url(url):
177
+ "Parse GitHub URL into (owner, repo, type, ref, path) or None"
178
+ m = re.match(r'https?://(?:www\.)?github\.com/([^/]+)/([^/]+)(?:/([^/]+)(?:/([^/]+)(?:/(.+))?)?)?', url)
179
+ return dict(zip('owner repo typ ref path'.split(), m.groups())) if m else None
155
180
 
156
181
  # %% ../00_xml.ipynb
157
182
  @delegates(folder2ctx)
158
183
  def repo2ctx(
159
- owner:str, # GitHub repo owner
160
- repo:str, # GitHub repo name
161
- ref:str=None, # Git ref (branch/tag/sha); defaults to repo's default branch
184
+ owner:str, # GitHub repo owner or "owner/repo" or a full github URL
185
+ repo:str=None, # GitHub repo name (leave empty if using "owner/repo" or URL format for owner param)
186
+ ref:str=None, # Git ref (branch/tag/sha) (get from URL not provided); defaults to repo's default branch
187
+ folder:str=None, # Only include files under this path (get from URL not provided)
188
+ show_filters:bool=True, # Include filter info in title?
189
+ token:str=None, # GitHub token (uses GITHUB_TOKEN env var if None)
162
190
  **kwargs # Passed to `folder2ctx`
163
- )->str: # XML for LM context
191
+ )->Union[str,dict]: # XML for LM context, or dict of file sizes
164
192
  "Convert GitHub repo to XML context without cloning"
165
193
  import tempfile, tarfile, io
166
- api = GhApi()
194
+ if owner.startswith('http'):
195
+ parsed = parse_gh_url(owner)
196
+ if not parsed: raise ValueError(f"Invalid GitHub URL: {owner}")
197
+ owner,repo = parsed['owner'], parsed['repo']
198
+ ref = ref or parsed.get('ref')
199
+ folder = folder or parsed.get('path')
200
+ if repo is None: owner, repo = owner.split('/')
201
+ api = GhApi(token=token)
167
202
  if ref is None: ref = api.repos.get(owner, repo).default_branch
168
203
  data = api.repos.download_tarball_archive(owner, repo, ref)
169
- parts = ' | '.join(f"{k}: {', '.join(v) if isinstance(v, (list,tuple)) else v}"
170
- for k,v in kwargs.items() if v)
171
- title = f"GitHub repository contents from {owner}/{repo} at ref '{ref}'"
172
- if parts: title += f" (filters applied: {parts})"
204
+ title = f"GitHub repository contents from {owner}/{repo}/{ref}"
205
+ if folder: title += f'/{folder}'
206
+ if show_filters:
207
+ parts = [f"{k}: {', '.join(v) if isinstance(v, (list,tuple)) else v}" for k,v in kwargs.items() if v]
208
+ if parts: title += f" (filters applied -- {' | '.join(parts)})"
173
209
  tf = tarfile.open(fileobj=io.BytesIO(data))
174
210
  with tempfile.TemporaryDirectory() as tmp:
175
211
  tf.extractall(tmp, filter='data')
176
212
  subdir = Path(tmp) / tf.getmembers()[0].name.split('/')[0]
177
- return folder2ctx(subdir, include_base=False, title=title, **kwargs)
178
-
179
- # %% ../00_xml.ipynb
180
- @call_parse
181
- @delegates(folder2ctx)
182
- def folder2ctx_cli(
183
- folder:str, # Folder name containing files to add to context
184
- out:bool=True, # Include notebook cell outputs?
185
- **kwargs # Passed to `folder2ctx`
186
- )->str: # XML for Claude context
187
- "CLI to convert folder contents to XML context, handling notebooks"
188
- print(folder2ctx(folder, out=out, **kwargs))
213
+ if folder: subdir = subdir/folder
214
+ return folder2ctx(subdir, include_base=False, title=title, readme_first=True, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: toolslm
3
- Version: 0.3.11
3
+ Version: 0.3.13
4
4
  Summary: Tools to make language models a bit easier to use
5
5
  Home-page: https://github.com/AnswerDotAI/toolslm
6
6
  Author: Jeremy Howard
@@ -16,7 +16,7 @@ Classifier: License :: OSI Approved :: Apache Software License
16
16
  Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: fastcore>=1.9.6
19
+ Requires-Dist: fastcore>=1.9.7
20
20
  Requires-Dist: httpx
21
21
  Provides-Extra: dev
22
22
  Dynamic: author
@@ -0,0 +1,13 @@
1
+ toolslm/__init__.py,sha256=jXmhGysidmiPOxLAzcyzQjjT98GxaQk2jHMECTsHr04,23
2
+ toolslm/_modidx.py,sha256=DMBoEHx7c0EvxmSiLYbi0Grd8hjSQKiuK4xJdfa1rFo,5410
3
+ toolslm/download.py,sha256=yMhyY3u26XRr6a4eZuCCmkprS7LQhHASl01Zn2B4q_o,4481
4
+ toolslm/funccall.py,sha256=_5TyhTjWaWLi-eJ96-4P3_faFv6Ft07nO60UjCF-bPU,11160
5
+ toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
6
+ toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
7
+ toolslm/xml.py,sha256=5sT-rRwpYTNpxNnZPr0QqIS2uZAAjsR3aE3c90nXzWA,9820
8
+ toolslm-0.3.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
+ toolslm-0.3.13.dist-info/METADATA,sha256=uWzDigLPr9FoLJUdC3oXk8KfioKRfm4wTJi8k9K52iM,2404
10
+ toolslm-0.3.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ toolslm-0.3.13.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
12
+ toolslm-0.3.13.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
13
+ toolslm-0.3.13.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- toolslm/__init__.py,sha256=TESjMH0a_iUkwdfWT4nyzKizSFmmCY2omxnS2XyT97Y,23
2
- toolslm/_modidx.py,sha256=kpgsDpj-Tvn90wezrHaMttyzhNcyNVgw_dQgK10qotI,5308
3
- toolslm/download.py,sha256=g3BxUSxylC_575M7RFSJ1GI3Co3EwPDdEeWzxaf2Czk,4451
4
- toolslm/funccall.py,sha256=_5TyhTjWaWLi-eJ96-4P3_faFv6Ft07nO60UjCF-bPU,11160
5
- toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
6
- toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
7
- toolslm/xml.py,sha256=tAHoqXrTRiX8i3pR-9KpHoBb8QXJ_TKEVyTEOPviudE,8095
8
- toolslm-0.3.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
- toolslm-0.3.11.dist-info/METADATA,sha256=LrsRNEumsWCi0JnHLTn73b8GoqjwQ3m6mDak4n2h47g,2404
10
- toolslm-0.3.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- toolslm-0.3.11.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
12
- toolslm-0.3.11.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
13
- toolslm-0.3.11.dist-info/RECORD,,