toolslm 0.3.9__py3-none-any.whl → 0.3.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toolslm/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.9"
1
+ __version__ = "0.3.17"
toolslm/_modidx.py CHANGED
@@ -49,5 +49,10 @@ d = { 'settings': { 'branch': 'main',
49
49
  'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
50
50
  'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
51
51
  'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
52
+ 'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
52
53
  'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
53
- 'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py')}}}
54
+ 'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py'),
55
+ 'toolslm.xml.sym2file': ('xml.html#sym2file', 'toolslm/xml.py'),
56
+ 'toolslm.xml.sym2folderctx': ('xml.html#sym2folderctx', 'toolslm/xml.py'),
57
+ 'toolslm.xml.sym2pkgctx': ('xml.html#sym2pkgctx', 'toolslm/xml.py'),
58
+ 'toolslm.xml.sym2pkgpath': ('xml.html#sym2pkgpath', 'toolslm/xml.py')}}}
toolslm/download.py CHANGED
@@ -8,6 +8,7 @@ from fastcore.utils import *
8
8
  from httpx import get
9
9
  from fastcore.meta import delegates
10
10
  from urllib.parse import urlparse, urljoin
11
+ from .xml import parse_gh_url
11
12
 
12
13
  # %% ../03_download.ipynb
13
14
  def clean_md(text, rm_comments=True, rm_details=True):
toolslm/funccall.py CHANGED
@@ -5,7 +5,7 @@ __all__ = ['empty', 'custom_types', 'get_schema', 'python', 'mk_ns', 'call_func'
5
5
  'mk_tool']
6
6
 
7
7
  # %% ../01_funccall.ipynb
8
- import inspect, json
8
+ import inspect, json, ast
9
9
  from collections import abc
10
10
  from fastcore.utils import *
11
11
  from fastcore.docments import docments
@@ -37,12 +37,20 @@ def _types(t:type)->tuple[str,Optional[str]]:
37
37
  else: return tmap.get(t.__name__, "object"), None
38
38
 
39
39
  # %% ../01_funccall.ipynb
40
- def _param(name, info):
41
- "json schema parameter given `name` and `info` from docments full dict."
40
+ def _param(
41
+ name, # param name
42
+ info, # dict from docments
43
+ evalable=False): # stringify defaults that can't be literal_eval'd?
44
+ "json schema parameter given `name` and `info` from docments full dict"
42
45
  paramt,itemt = _types(info.anno)
43
46
  pschema = dict(type=paramt, description=info.docment or "")
44
47
  if itemt: pschema["items"] = {"type": itemt}
45
- if info.default is not empty: pschema["default"] = info.default
48
+ if info.default is not empty:
49
+ if evalable:
50
+ try: ast.literal_eval(repr(info.default))
51
+ except: pschema["default"] = str(info.default)
52
+ else: pschema["default"] = info.default
53
+ else: pschema["default"] = info.default
46
54
  return pschema
47
55
 
48
56
  # %% ../01_funccall.ipynb
@@ -90,9 +98,9 @@ def _handle_container(origin, args, defs):
90
98
  return None
91
99
 
92
100
  # %% ../01_funccall.ipynb
93
- def _process_property(name, obj, props, req, defs):
101
+ def _process_property(name, obj, props, req, defs, evalable=False):
94
102
  "Process a single property of the schema"
95
- p = _param(name, obj)
103
+ p = _param(name, obj, evalable=evalable)
96
104
  props[name] = p
97
105
  if obj.default is empty: req[name] = True
98
106
 
@@ -103,14 +111,14 @@ def _process_property(name, obj, props, req, defs):
103
111
  p.update(_handle_type(obj.anno, defs))
104
112
 
105
113
  # %% ../01_funccall.ipynb
106
- def _get_nested_schema(obj):
114
+ def _get_nested_schema(obj, evalable=False, skip_hidden=False):
107
115
  "Generate nested JSON schema for a class or function"
108
116
  d = docments(obj, full=True)
109
117
  props, req, defs = {}, {}, {}
110
118
 
111
119
  for n, o in d.items():
112
- if n != 'return' and n != 'self':
113
- _process_property(n, o, props, req, defs)
120
+ if n != 'return' and n != 'self' and not (skip_hidden and n.startswith('_')):
121
+ _process_property(n, o, props, req, defs, evalable=evalable)
114
122
 
115
123
  tkw = {}
116
124
  if isinstance(obj, type): tkw['title']=obj.__name__
@@ -120,10 +128,15 @@ def _get_nested_schema(obj):
120
128
  return schema
121
129
 
122
130
  # %% ../01_funccall.ipynb
123
- def get_schema(f:Union[callable,dict], pname='input_schema')->dict:
131
+ def get_schema(
132
+ f:Union[callable,dict], # Function to get schema for
133
+ pname='input_schema', # Key name for parameters
134
+ evalable=False, # stringify defaults that can't be literal_eval'd?
135
+ skip_hidden=False # skip parameters starting with '_'?
136
+ )->dict: # {'name':..., 'description':..., pname:...}
124
137
  "Generate JSON schema for a class, function, or method"
125
138
  if isinstance(f, dict): return f
126
- schema = _get_nested_schema(f)
139
+ schema = _get_nested_schema(f, evalable=evalable, skip_hidden=skip_hidden)
127
140
  desc = f.__doc__
128
141
  assert desc, "Docstring missing!"
129
142
  d = docments(f, full=True)
toolslm/xml.py CHANGED
@@ -2,10 +2,11 @@
2
2
 
3
3
  # %% auto 0
4
4
  __all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'mk_doctype', 'mk_doc', 'docs_xml',
5
- 'read_file', 'files2ctx', 'folder2ctx', 'repo2ctx', 'folder2ctx_cli']
5
+ 'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx', 'sym2pkgpath', 'sym2pkgctx',
6
+ 'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
6
7
 
7
8
  # %% ../00_xml.ipynb
8
- import hashlib,xml.etree.ElementTree as ET
9
+ import hashlib, inspect, xml.etree.ElementTree as ET
9
10
  from collections import namedtuple
10
11
  from ghapi.all import GhApi
11
12
 
@@ -49,22 +50,23 @@ def cell2out(o):
49
50
  if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')
50
51
 
51
52
  # %% ../00_xml.ipynb
52
- def cell2xml(cell, out=True):
53
+ def cell2xml(cell, out=True, ids=True):
53
54
  "Convert notebook cell to concise XML format"
54
55
  src = ''.join(getattr(cell, 'source', ''))
55
56
  f = Code if cell.cell_type=='code' else Md
56
- if not out: return f(src)
57
+ kw = dict(id=cell.id) if ids and hasattr(cell, 'id') else {}
58
+ if not out: return f(src, **kw)
57
59
  parts = [Source(src)]
58
60
  out_items = L(getattr(cell,'outputs',[])).map(cell2out).filter()
59
61
  if out_items: parts.append(Outs(*out_items))
60
- return f(*parts)
62
+ return f(*parts, **kw)
61
63
 
62
64
  # %% ../00_xml.ipynb
63
- def nb2xml(fname=None, nb=None, out=True):
65
+ def nb2xml(fname=None, nb=None, out=True, ids=True):
64
66
  "Convert notebook to XML format"
65
67
  assert bool(fname)^bool(nb), "Pass either `fname` or `nb`"
66
68
  if not nb: nb = dict2obj(fname.read_json())
67
- cells_xml = [to_xml(cell2xml(c, out=out), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
69
+ cells_xml = [to_xml(cell2xml(c, out=out, ids=ids), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
68
70
  return to_xml(Notebook(*cells_xml), do_escape=False)
69
71
 
70
72
  # %% ../00_xml.ipynb
@@ -101,7 +103,7 @@ def mk_doc(index:int, # The document index
101
103
  # %% ../00_xml.ipynb
102
104
  def docs_xml(docs:list[str], # The content of each document
103
105
  srcs:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
104
- prefix:bool=True, # Include Anthropic's suggested prose intro?
106
+ prefix:bool=False, # Include Anthropic's suggested prose intro?
105
107
  details:Optional[list]=None, # Optional list of dicts with additional attrs for each doc
106
108
  title:str=None # Optional title attr for Documents element
107
109
  )->str:
@@ -114,67 +116,82 @@ def docs_xml(docs:list[str], # The content of each document
114
116
  return pre + to_xml(Documents(*docs, **kw), do_escape=False)
115
117
 
116
118
  # %% ../00_xml.ipynb
117
- def read_file(fname, out=True, max_size=None):
119
+ def read_file(fname, out=True, max_size=None, ids=True):
118
120
  "Read file content, converting notebooks to XML if needed"
119
121
  fname = Path(fname)
120
- if fname.suffix == '.ipynb': res = nb2xml(fname, out=out)
122
+ if fname.suffix == '.ipynb': res = nb2xml(fname, out=out, ids=ids)
121
123
  else: res = fname.read_text()
122
124
  if max_size and len(res)>max_size: return f"[Skipped: {fname.name} exceeds {max_size} bytes]"
123
125
  return res
124
126
 
125
127
  # %% ../00_xml.ipynb
128
+ @delegates(docs_xml)
126
129
  def files2ctx(
127
130
  fnames:list[Union[str,Path]], # List of file names to add to context
128
- prefix:bool=True, # Include Anthropic's suggested prose intro?
129
131
  out:bool=True, # Include notebook cell outputs?
130
132
  srcs:Optional[list]=None, # Use the labels instead of `fnames`
131
- title:str=None, # Optional title attr for Documents element
132
- max_size:int=None # Skip files larger than this (bytes)
133
+ max_size:int=None, # Skip files larger than this (bytes)
134
+ ids:bool=True, # Include cell ids in notebooks?
135
+ **kwargs
133
136
  )->str: # XML for LM context
134
137
  "Convert files to XML context, handling notebooks"
135
138
  fnames = [Path(o) for o in fnames]
136
- contents = [read_file(o, out=out, max_size=max_size) for o in fnames]
137
- return docs_xml(contents, srcs or fnames, prefix=prefix, title=title)
139
+ contents = [read_file(o, out=out, max_size=max_size, ids=ids) for o in fnames]
140
+ return docs_xml(contents, srcs or fnames, **kwargs)
138
141
 
139
142
  # %% ../00_xml.ipynb
140
143
  @delegates(globtastic)
141
144
  def folder2ctx(
142
- folder:Union[str,Path],
143
- prefix:bool=True, # Include Anthropic's suggested prose intro?
145
+ folder:Union[str,Path], # Folder to read
146
+ prefix:bool=False, # Include Anthropic's suggested prose intro?
144
147
  out:bool=True, # Include notebook cell outputs?
145
148
  include_base:bool=True, # Include full path in src?
146
149
  title:str=None, # Optional title attr for Documents element
147
150
  max_size:int=100_000, # Skip files larger than this (bytes)
151
+ max_total:int=10_000_000, # Max total output size in bytes
152
+ readme_first:bool=False, # Prioritize README files at start of context?
153
+ files_only:bool=False, # Return dict of {filename: size} instead of context?
154
+ ids:bool=True, # Include cell ids in notebooks?
148
155
  **kwargs
149
- )->str:
156
+ )->Union[str,dict]:
150
157
  "Convert folder contents to XML context, handling notebooks"
151
158
  folder = Path(folder)
152
- fnames = globtastic(folder, **kwargs)
153
- srcs = fnames if include_base else [Path(f).relative_to(folder) for f in fnames]
154
- return files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size)
159
+ fnames = pglob(folder, **kwargs)
160
+ if files_only: return {str(f.relative_to(folder)): f.stat().st_size for f in fnames}
161
+ if readme_first: fnames = sorted(fnames, key=lambda f: (0 if 'readme' in f.name.lower() else 1, f))
162
+ srcs = fnames if include_base else [f.relative_to(folder) for f in fnames]
163
+ res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size, ids=ids)
164
+ suf = f"\n\n[TRUNCATED: output size {{_outsz_}} exceeded max size {max_total} bytes]"
165
+ if max_total and len(res) > max_total: res = truncstr(res, max_total, suf=suf, sizevar='_outsz_')
166
+ return res
167
+
168
+ # %% ../00_xml.ipynb
169
+ def sym2file(sym):
170
+ "Return md string with filepath and contents for a symbol's source file"
171
+ f = Path(inspect.getfile(sym))
172
+ return f"- `{f}`\n\n````\n{f.read_text()}\n````"
155
173
 
156
174
  # %% ../00_xml.ipynb
157
175
  @delegates(folder2ctx)
158
- def repo2ctx(
159
- owner:str, # GitHub repo owner
160
- repo:str, # GitHub repo name
161
- ref:str=None, # Git ref (branch/tag/sha); defaults to repo's default branch
162
- **kwargs # Passed to `folder2ctx`
163
- )->str: # XML for LM context
164
- "Convert GitHub repo to XML context without cloning"
165
- import tempfile, tarfile, io
166
- api = GhApi()
167
- if ref is None: ref = api.repos.get(owner, repo).default_branch
168
- data = api.repos.download_tarball_archive(owner, repo, ref)
169
- parts = ' | '.join(f"{k}: {', '.join(v) if isinstance(v, (list,tuple)) else v}"
170
- for k,v in kwargs.items() if v)
171
- title = f"GitHub repository contents from {owner}/{repo} at ref '{ref}'"
172
- if parts: title += f" (filters applied: {parts})"
173
- tf = tarfile.open(fileobj=io.BytesIO(data))
174
- with tempfile.TemporaryDirectory() as tmp:
175
- tf.extractall(tmp, filter='data')
176
- subdir = Path(tmp) / tf.getmembers()[0].name.split('/')[0]
177
- return folder2ctx(subdir, include_base=False, title=title, **kwargs)
176
+ def sym2folderctx(
177
+ sym,
178
+ types:str|list='py', # list or comma-separated str of ext types from: py, js, java, c, cpp, rb, r, ex, sh, web, doc, cfg
179
+ skip_file_re=r'^_mod',
180
+ **kwargs):
181
+ "Return folder context for a symbol's source file location"
182
+ return folder2ctx(Path(inspect.getfile(sym)).parent, types=types, skip_file_re=skip_file_re, **kwargs)
183
+
184
+ # %% ../00_xml.ipynb
185
+ def sym2pkgpath(sym):
186
+ "Get root package path for a symbol"
187
+ root = sym.__module__.split('.')[0]
188
+ return Path(sys.modules[root].__path__[0])
189
+
190
+ # %% ../00_xml.ipynb
191
+ @delegates(folder2ctx)
192
+ def sym2pkgctx(sym, types:str|list='py', skip_file_re=r'^_mod', **kwargs):
193
+ "Return repo context for a symbol's root package"
194
+ return folder2ctx(sym2pkgpath(sym), types=types, skip_file_re=skip_file_re, **kwargs)
178
195
 
179
196
  # %% ../00_xml.ipynb
180
197
  @call_parse
@@ -186,3 +203,44 @@ def folder2ctx_cli(
186
203
  )->str: # XML for Claude context
187
204
  "CLI to convert folder contents to XML context, handling notebooks"
188
205
  print(folder2ctx(folder, out=out, **kwargs))
206
+
207
+ # %% ../00_xml.ipynb
208
+ def parse_gh_url(url):
209
+ "Parse GitHub URL into (owner, repo, type, ref, path) or None"
210
+ m = re.match(r'https?://(?:www\.)?github\.com/([^/]+)/([^/]+)(?:/([^/]+)(?:/([^/]+)(?:/(.+))?)?)?', url)
211
+ return dict(zip('owner repo typ ref path'.split(), m.groups())) if m else None
212
+
213
+ # %% ../00_xml.ipynb
214
+ @delegates(folder2ctx)
215
+ def repo2ctx(
216
+ owner:str, # GitHub repo owner or "owner/repo" or a full github URL
217
+ repo:str=None, # GitHub repo name (leave empty if using "owner/repo" or URL format for owner param)
218
+ ref:str=None, # Git ref (branch/tag/sha) (get from URL not provided); defaults to repo's default branch
219
+ folder:str=None, # Only include files under this path (get from URL not provided)
220
+ show_filters:bool=True, # Include filter info in title?
221
+ token:str=None, # GitHub token (uses GITHUB_TOKEN env var if None)
222
+ **kwargs # Passed to `folder2ctx`
223
+ )->Union[str,dict]: # XML for LM context, or dict of file sizes
224
+ "Convert GitHub repo to XML context without cloning"
225
+ import tempfile, tarfile, io
226
+ if owner.startswith('http'):
227
+ parsed = parse_gh_url(owner)
228
+ if not parsed: raise ValueError(f"Invalid GitHub URL: {owner}")
229
+ owner,repo = parsed['owner'], parsed['repo']
230
+ ref = ref or parsed.get('ref')
231
+ folder = folder or parsed.get('path')
232
+ if repo is None: owner, repo = owner.split('/')
233
+ api = GhApi(token=token)
234
+ if ref is None: ref = api.repos.get(owner, repo).default_branch
235
+ data = api.repos.download_tarball_archive(owner, repo, ref)
236
+ title = f"GitHub repository contents from {owner}/{repo}/{ref}"
237
+ if folder: title += f'/{folder}'
238
+ if show_filters:
239
+ parts = [f"{k}: {', '.join(v) if isinstance(v, (list,tuple)) else v}" for k,v in kwargs.items() if v]
240
+ if parts: title += f" (filters applied -- {' | '.join(parts)})"
241
+ tf = tarfile.open(fileobj=io.BytesIO(data))
242
+ with tempfile.TemporaryDirectory() as tmp:
243
+ tf.extractall(tmp, filter='data')
244
+ subdir = Path(tmp) / tf.getmembers()[0].name.split('/')[0]
245
+ if folder: subdir = subdir/folder
246
+ return folder2ctx(subdir, include_base=False, title=title, readme_first=True, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: toolslm
3
- Version: 0.3.9
3
+ Version: 0.3.17
4
4
  Summary: Tools to make language models a bit easier to use
5
5
  Home-page: https://github.com/AnswerDotAI/toolslm
6
6
  Author: Jeremy Howard
@@ -16,8 +16,9 @@ Classifier: License :: OSI Approved :: Apache Software License
16
16
  Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: fastcore>=1.9.6
19
+ Requires-Dist: fastcore>=1.9.7
20
20
  Requires-Dist: httpx
21
+ Requires-Dist: ghapi
21
22
  Provides-Extra: dev
22
23
  Dynamic: author
23
24
  Dynamic: author-email
@@ -0,0 +1,13 @@
1
+ toolslm/__init__.py,sha256=HXhmv802-3PQnM5q29vlyrO10zGWKWZQ7xsx3qPYVRM,23
2
+ toolslm/_modidx.py,sha256=EC1pFuHb5MbfRMml7RXx1sxGXlTiczjUimXICuXUMn0,5806
3
+ toolslm/download.py,sha256=yMhyY3u26XRr6a4eZuCCmkprS7LQhHASl01Zn2B4q_o,4481
4
+ toolslm/funccall.py,sha256=_5TyhTjWaWLi-eJ96-4P3_faFv6Ft07nO60UjCF-bPU,11160
5
+ toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
6
+ toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
7
+ toolslm/xml.py,sha256=I2lRJPVG6us1g_gOTOKbbnZdlGB3g2-6MYGcLSqkFrM,11173
8
+ toolslm-0.3.17.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
+ toolslm-0.3.17.dist-info/METADATA,sha256=jSPVU5a6Qe0eYmuxTqj30o0YP81UgHOH_gIAOeeQDaE,2425
10
+ toolslm-0.3.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ toolslm-0.3.17.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
12
+ toolslm-0.3.17.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
13
+ toolslm-0.3.17.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- toolslm/__init__.py,sha256=xmkmdvq15kb61xdtCoa1YARnvHBnUgI-0GWIJYvHNeA,22
2
- toolslm/_modidx.py,sha256=kpgsDpj-Tvn90wezrHaMttyzhNcyNVgw_dQgK10qotI,5308
3
- toolslm/download.py,sha256=g3BxUSxylC_575M7RFSJ1GI3Co3EwPDdEeWzxaf2Czk,4451
4
- toolslm/funccall.py,sha256=0OBrx6KzI0KK13L-5Hn69yah9oZhgTsKchmMenCoT0A,10421
5
- toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
6
- toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
7
- toolslm/xml.py,sha256=tAHoqXrTRiX8i3pR-9KpHoBb8QXJ_TKEVyTEOPviudE,8095
8
- toolslm-0.3.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
- toolslm-0.3.9.dist-info/METADATA,sha256=djSwIqYu8Taj8g0yyXKw3IqFr_fbAKhbI3aQu14kv9U,2403
10
- toolslm-0.3.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- toolslm-0.3.9.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
12
- toolslm-0.3.9.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
13
- toolslm-0.3.9.dist-info/RECORD,,