toolslm 0.3.7__py3-none-any.whl → 0.3.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toolslm/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.7"
1
+ __version__ = "0.3.17"
toolslm/_modidx.py CHANGED
@@ -49,5 +49,10 @@ d = { 'settings': { 'branch': 'main',
49
49
  'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
50
50
  'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
51
51
  'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
52
+ 'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
52
53
  'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
53
- 'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py')}}}
54
+ 'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py'),
55
+ 'toolslm.xml.sym2file': ('xml.html#sym2file', 'toolslm/xml.py'),
56
+ 'toolslm.xml.sym2folderctx': ('xml.html#sym2folderctx', 'toolslm/xml.py'),
57
+ 'toolslm.xml.sym2pkgctx': ('xml.html#sym2pkgctx', 'toolslm/xml.py'),
58
+ 'toolslm.xml.sym2pkgpath': ('xml.html#sym2pkgpath', 'toolslm/xml.py')}}}
toolslm/download.py CHANGED
@@ -8,6 +8,7 @@ from fastcore.utils import *
8
8
  from httpx import get
9
9
  from fastcore.meta import delegates
10
10
  from urllib.parse import urlparse, urljoin
11
+ from .xml import parse_gh_url
11
12
 
12
13
  # %% ../03_download.ipynb
13
14
  def clean_md(text, rm_comments=True, rm_details=True):
toolslm/funccall.py CHANGED
@@ -5,7 +5,7 @@ __all__ = ['empty', 'custom_types', 'get_schema', 'python', 'mk_ns', 'call_func'
5
5
  'mk_tool']
6
6
 
7
7
  # %% ../01_funccall.ipynb
8
- import inspect, json
8
+ import inspect, json, ast
9
9
  from collections import abc
10
10
  from fastcore.utils import *
11
11
  from fastcore.docments import docments
@@ -37,12 +37,20 @@ def _types(t:type)->tuple[str,Optional[str]]:
37
37
  else: return tmap.get(t.__name__, "object"), None
38
38
 
39
39
  # %% ../01_funccall.ipynb
40
- def _param(name, info):
41
- "json schema parameter given `name` and `info` from docments full dict."
40
+ def _param(
41
+ name, # param name
42
+ info, # dict from docments
43
+ evalable=False): # stringify defaults that can't be literal_eval'd?
44
+ "json schema parameter given `name` and `info` from docments full dict"
42
45
  paramt,itemt = _types(info.anno)
43
46
  pschema = dict(type=paramt, description=info.docment or "")
44
47
  if itemt: pschema["items"] = {"type": itemt}
45
- if info.default is not empty: pschema["default"] = info.default
48
+ if info.default is not empty:
49
+ if evalable:
50
+ try: ast.literal_eval(repr(info.default))
51
+ except: pschema["default"] = str(info.default)
52
+ else: pschema["default"] = info.default
53
+ else: pschema["default"] = info.default
46
54
  return pschema
47
55
 
48
56
  # %% ../01_funccall.ipynb
@@ -90,9 +98,9 @@ def _handle_container(origin, args, defs):
90
98
  return None
91
99
 
92
100
  # %% ../01_funccall.ipynb
93
- def _process_property(name, obj, props, req, defs):
101
+ def _process_property(name, obj, props, req, defs, evalable=False):
94
102
  "Process a single property of the schema"
95
- p = _param(name, obj)
103
+ p = _param(name, obj, evalable=evalable)
96
104
  props[name] = p
97
105
  if obj.default is empty: req[name] = True
98
106
 
@@ -103,14 +111,14 @@ def _process_property(name, obj, props, req, defs):
103
111
  p.update(_handle_type(obj.anno, defs))
104
112
 
105
113
  # %% ../01_funccall.ipynb
106
- def _get_nested_schema(obj):
114
+ def _get_nested_schema(obj, evalable=False, skip_hidden=False):
107
115
  "Generate nested JSON schema for a class or function"
108
116
  d = docments(obj, full=True)
109
117
  props, req, defs = {}, {}, {}
110
118
 
111
119
  for n, o in d.items():
112
- if n != 'return' and n != 'self':
113
- _process_property(n, o, props, req, defs)
120
+ if n != 'return' and n != 'self' and not (skip_hidden and n.startswith('_')):
121
+ _process_property(n, o, props, req, defs, evalable=evalable)
114
122
 
115
123
  tkw = {}
116
124
  if isinstance(obj, type): tkw['title']=obj.__name__
@@ -120,10 +128,15 @@ def _get_nested_schema(obj):
120
128
  return schema
121
129
 
122
130
  # %% ../01_funccall.ipynb
123
- def get_schema(f:Union[callable,dict], pname='input_schema')->dict:
131
+ def get_schema(
132
+ f:Union[callable,dict], # Function to get schema for
133
+ pname='input_schema', # Key name for parameters
134
+ evalable=False, # stringify defaults that can't be literal_eval'd?
135
+ skip_hidden=False # skip parameters starting with '_'?
136
+ )->dict: # {'name':..., 'description':..., pname:...}
124
137
  "Generate JSON schema for a class, function, or method"
125
138
  if isinstance(f, dict): return f
126
- schema = _get_nested_schema(f)
139
+ schema = _get_nested_schema(f, evalable=evalable, skip_hidden=skip_hidden)
127
140
  desc = f.__doc__
128
141
  assert desc, "Docstring missing!"
129
142
  d = docments(f, full=True)
toolslm/xml.py CHANGED
@@ -2,10 +2,11 @@
2
2
 
3
3
  # %% auto 0
4
4
  __all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'mk_doctype', 'mk_doc', 'docs_xml',
5
- 'read_file', 'files2ctx', 'folder2ctx', 'repo2ctx', 'folder2ctx_cli']
5
+ 'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx', 'sym2pkgpath', 'sym2pkgctx',
6
+ 'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
6
7
 
7
8
  # %% ../00_xml.ipynb
8
- import hashlib,xml.etree.ElementTree as ET
9
+ import hashlib, inspect, xml.etree.ElementTree as ET
9
10
  from collections import namedtuple
10
11
  from ghapi.all import GhApi
11
12
 
@@ -49,22 +50,23 @@ def cell2out(o):
49
50
  if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')
50
51
 
51
52
  # %% ../00_xml.ipynb
52
- def cell2xml(cell, out=True):
53
+ def cell2xml(cell, out=True, ids=True):
53
54
  "Convert notebook cell to concise XML format"
54
55
  src = ''.join(getattr(cell, 'source', ''))
55
56
  f = Code if cell.cell_type=='code' else Md
56
- if not out: return f(src)
57
+ kw = dict(id=cell.id) if ids and hasattr(cell, 'id') else {}
58
+ if not out: return f(src, **kw)
57
59
  parts = [Source(src)]
58
60
  out_items = L(getattr(cell,'outputs',[])).map(cell2out).filter()
59
61
  if out_items: parts.append(Outs(*out_items))
60
- return f(*parts)
62
+ return f(*parts, **kw)
61
63
 
62
64
  # %% ../00_xml.ipynb
63
- def nb2xml(fname=None, nb=None, out=True):
65
+ def nb2xml(fname=None, nb=None, out=True, ids=True):
64
66
  "Convert notebook to XML format"
65
67
  assert bool(fname)^bool(nb), "Pass either `fname` or `nb`"
66
68
  if not nb: nb = dict2obj(fname.read_json())
67
- cells_xml = [to_xml(cell2xml(c, out=out), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
69
+ cells_xml = [to_xml(cell2xml(c, out=out, ids=ids), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
68
70
  return to_xml(Notebook(*cells_xml), do_escape=False)
69
71
 
70
72
  # %% ../00_xml.ipynb
@@ -101,68 +103,95 @@ def mk_doc(index:int, # The document index
101
103
  # %% ../00_xml.ipynb
102
104
  def docs_xml(docs:list[str], # The content of each document
103
105
  srcs:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
104
- prefix:bool=True, # Include Anthropic's suggested prose intro?
105
- details:Optional[list]=None # Optional list of dicts with additional attrs for each doc
106
+ prefix:bool=False, # Include Anthropic's suggested prose intro?
107
+ details:Optional[list]=None, # Optional list of dicts with additional attrs for each doc
108
+ title:str=None # Optional title attr for Documents element
106
109
  )->str:
107
110
  "Create an XML string containing `docs` in Anthropic's recommended format"
108
111
  pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
109
112
  if srcs is None: srcs = [None]*len(docs)
110
113
  if details is None: details = [{}]*len(docs)
111
114
  docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
112
- return pre + to_xml(Documents(docs), do_escape=False)
115
+ kw = dict(title=title) if title else {}
116
+ return pre + to_xml(Documents(*docs, **kw), do_escape=False)
113
117
 
114
118
  # %% ../00_xml.ipynb
115
- def read_file(fname, out=True):
119
+ def read_file(fname, out=True, max_size=None, ids=True):
116
120
  "Read file content, converting notebooks to XML if needed"
117
121
  fname = Path(fname)
118
- if fname.suffix == '.ipynb': return nb2xml(fname, out=out)
119
- return fname.read_text()
122
+ if fname.suffix == '.ipynb': res = nb2xml(fname, out=out, ids=ids)
123
+ else: res = fname.read_text()
124
+ if max_size and len(res)>max_size: return f"[Skipped: {fname.name} exceeds {max_size} bytes]"
125
+ return res
120
126
 
121
127
  # %% ../00_xml.ipynb
128
+ @delegates(docs_xml)
122
129
  def files2ctx(
123
130
  fnames:list[Union[str,Path]], # List of file names to add to context
124
- prefix:bool=True, # Include Anthropic's suggested prose intro?
125
131
  out:bool=True, # Include notebook cell outputs?
126
- srcs:Optional[list]=None # Use the labels instead of `fnames`
132
+ srcs:Optional[list]=None, # Use the labels instead of `fnames`
133
+ max_size:int=None, # Skip files larger than this (bytes)
134
+ ids:bool=True, # Include cell ids in notebooks?
135
+ **kwargs
127
136
  )->str: # XML for LM context
128
137
  "Convert files to XML context, handling notebooks"
129
138
  fnames = [Path(o) for o in fnames]
130
- contents = [read_file(o, out=out) for o in fnames]
131
- return docs_xml(contents, srcs or fnames, prefix=prefix)
139
+ contents = [read_file(o, out=out, max_size=max_size, ids=ids) for o in fnames]
140
+ return docs_xml(contents, srcs or fnames, **kwargs)
132
141
 
133
142
  # %% ../00_xml.ipynb
134
143
  @delegates(globtastic)
135
144
  def folder2ctx(
136
- folder:Union[str,Path],
137
- prefix:bool=True,
138
- out:bool=True,
139
- include_base:bool=True,
145
+ folder:Union[str,Path], # Folder to read
146
+ prefix:bool=False, # Include Anthropic's suggested prose intro?
147
+ out:bool=True, # Include notebook cell outputs?
148
+ include_base:bool=True, # Include full path in src?
149
+ title:str=None, # Optional title attr for Documents element
150
+ max_size:int=100_000, # Skip files larger than this (bytes)
151
+ max_total:int=10_000_000, # Max total output size in bytes
152
+ readme_first:bool=False, # Prioritize README files at start of context?
153
+ files_only:bool=False, # Return dict of {filename: size} instead of context?
154
+ ids:bool=True, # Include cell ids in notebooks?
140
155
  **kwargs
141
- )->str:
156
+ )->Union[str,dict]:
142
157
  "Convert folder contents to XML context, handling notebooks"
143
158
  folder = Path(folder)
144
- fnames = globtastic(folder, **kwargs)
145
- srcs = fnames if include_base else [Path(f).relative_to(folder) for f in fnames]
146
- return files2ctx(fnames, prefix=prefix, out=out, srcs=srcs)
159
+ fnames = pglob(folder, **kwargs)
160
+ if files_only: return {str(f.relative_to(folder)): f.stat().st_size for f in fnames}
161
+ if readme_first: fnames = sorted(fnames, key=lambda f: (0 if 'readme' in f.name.lower() else 1, f))
162
+ srcs = fnames if include_base else [f.relative_to(folder) for f in fnames]
163
+ res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size, ids=ids)
164
+ suf = f"\n\n[TRUNCATED: output size {{_outsz_}} exceeded max size {max_total} bytes]"
165
+ if max_total and len(res) > max_total: res = truncstr(res, max_total, suf=suf, sizevar='_outsz_')
166
+ return res
167
+
168
+ # %% ../00_xml.ipynb
169
+ def sym2file(sym):
170
+ "Return md string with filepath and contents for a symbol's source file"
171
+ f = Path(inspect.getfile(sym))
172
+ return f"- `{f}`\n\n````\n{f.read_text()}\n````"
147
173
 
148
174
  # %% ../00_xml.ipynb
149
175
  @delegates(folder2ctx)
150
- def repo2ctx(
151
- owner:str, # GitHub repo owner
152
- repo:str, # GitHub repo name
153
- ref:str=None, # Git ref (branch/tag/sha); defaults to repo's default branch
154
- **kwargs # Passed to `folder2ctx`
155
- )->str: # XML for LM context
156
- "Convert GitHub repo to XML context without cloning"
157
- import tempfile, tarfile, io
158
- api = GhApi()
159
- if ref is None: ref = api.repos.get(owner, repo).default_branch
160
- data = api.repos.download_tarball_archive(owner, repo, ref)
161
- tf = tarfile.open(fileobj=io.BytesIO(data))
162
- with tempfile.TemporaryDirectory() as tmp:
163
- tf.extractall(tmp, filter='data')
164
- subdir = Path(tmp) / tf.getmembers()[0].name.split('/')[0]
165
- return folder2ctx(subdir, include_base=False, **kwargs)
176
+ def sym2folderctx(
177
+ sym,
178
+ types:str|list='py', # list or comma-separated str of ext types from: py, js, java, c, cpp, rb, r, ex, sh, web, doc, cfg
179
+ skip_file_re=r'^_mod',
180
+ **kwargs):
181
+ "Return folder context for a symbol's source file location"
182
+ return folder2ctx(Path(inspect.getfile(sym)).parent, types=types, skip_file_re=skip_file_re, **kwargs)
183
+
184
+ # %% ../00_xml.ipynb
185
+ def sym2pkgpath(sym):
186
+ "Get root package path for a symbol"
187
+ root = sym.__module__.split('.')[0]
188
+ return Path(sys.modules[root].__path__[0])
189
+
190
+ # %% ../00_xml.ipynb
191
+ @delegates(folder2ctx)
192
+ def sym2pkgctx(sym, types:str|list='py', skip_file_re=r'^_mod', **kwargs):
193
+ "Return repo context for a symbol's root package"
194
+ return folder2ctx(sym2pkgpath(sym), types=types, skip_file_re=skip_file_re, **kwargs)
166
195
 
167
196
  # %% ../00_xml.ipynb
168
197
  @call_parse
@@ -174,3 +203,44 @@ def folder2ctx_cli(
174
203
  )->str: # XML for Claude context
175
204
  "CLI to convert folder contents to XML context, handling notebooks"
176
205
  print(folder2ctx(folder, out=out, **kwargs))
206
+
207
+ # %% ../00_xml.ipynb
208
+ def parse_gh_url(url):
209
+ "Parse GitHub URL into (owner, repo, type, ref, path) or None"
210
+ m = re.match(r'https?://(?:www\.)?github\.com/([^/]+)/([^/]+)(?:/([^/]+)(?:/([^/]+)(?:/(.+))?)?)?', url)
211
+ return dict(zip('owner repo typ ref path'.split(), m.groups())) if m else None
212
+
213
+ # %% ../00_xml.ipynb
214
+ @delegates(folder2ctx)
215
+ def repo2ctx(
216
+ owner:str, # GitHub repo owner or "owner/repo" or a full github URL
217
+ repo:str=None, # GitHub repo name (leave empty if using "owner/repo" or URL format for owner param)
218
+ ref:str=None, # Git ref (branch/tag/sha) (get from URL not provided); defaults to repo's default branch
219
+ folder:str=None, # Only include files under this path (get from URL not provided)
220
+ show_filters:bool=True, # Include filter info in title?
221
+ token:str=None, # GitHub token (uses GITHUB_TOKEN env var if None)
222
+ **kwargs # Passed to `folder2ctx`
223
+ )->Union[str,dict]: # XML for LM context, or dict of file sizes
224
+ "Convert GitHub repo to XML context without cloning"
225
+ import tempfile, tarfile, io
226
+ if owner.startswith('http'):
227
+ parsed = parse_gh_url(owner)
228
+ if not parsed: raise ValueError(f"Invalid GitHub URL: {owner}")
229
+ owner,repo = parsed['owner'], parsed['repo']
230
+ ref = ref or parsed.get('ref')
231
+ folder = folder or parsed.get('path')
232
+ if repo is None: owner, repo = owner.split('/')
233
+ api = GhApi(token=token)
234
+ if ref is None: ref = api.repos.get(owner, repo).default_branch
235
+ data = api.repos.download_tarball_archive(owner, repo, ref)
236
+ title = f"GitHub repository contents from {owner}/{repo}/{ref}"
237
+ if folder: title += f'/{folder}'
238
+ if show_filters:
239
+ parts = [f"{k}: {', '.join(v) if isinstance(v, (list,tuple)) else v}" for k,v in kwargs.items() if v]
240
+ if parts: title += f" (filters applied -- {' | '.join(parts)})"
241
+ tf = tarfile.open(fileobj=io.BytesIO(data))
242
+ with tempfile.TemporaryDirectory() as tmp:
243
+ tf.extractall(tmp, filter='data')
244
+ subdir = Path(tmp) / tf.getmembers()[0].name.split('/')[0]
245
+ if folder: subdir = subdir/folder
246
+ return folder2ctx(subdir, include_base=False, title=title, readme_first=True, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: toolslm
3
- Version: 0.3.7
3
+ Version: 0.3.17
4
4
  Summary: Tools to make language models a bit easier to use
5
5
  Home-page: https://github.com/AnswerDotAI/toolslm
6
6
  Author: Jeremy Howard
@@ -16,8 +16,9 @@ Classifier: License :: OSI Approved :: Apache Software License
16
16
  Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: fastcore>=1.9.6
19
+ Requires-Dist: fastcore>=1.9.7
20
20
  Requires-Dist: httpx
21
+ Requires-Dist: ghapi
21
22
  Provides-Extra: dev
22
23
  Dynamic: author
23
24
  Dynamic: author-email
@@ -0,0 +1,13 @@
1
+ toolslm/__init__.py,sha256=HXhmv802-3PQnM5q29vlyrO10zGWKWZQ7xsx3qPYVRM,23
2
+ toolslm/_modidx.py,sha256=EC1pFuHb5MbfRMml7RXx1sxGXlTiczjUimXICuXUMn0,5806
3
+ toolslm/download.py,sha256=yMhyY3u26XRr6a4eZuCCmkprS7LQhHASl01Zn2B4q_o,4481
4
+ toolslm/funccall.py,sha256=_5TyhTjWaWLi-eJ96-4P3_faFv6Ft07nO60UjCF-bPU,11160
5
+ toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
6
+ toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
7
+ toolslm/xml.py,sha256=I2lRJPVG6us1g_gOTOKbbnZdlGB3g2-6MYGcLSqkFrM,11173
8
+ toolslm-0.3.17.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
+ toolslm-0.3.17.dist-info/METADATA,sha256=jSPVU5a6Qe0eYmuxTqj30o0YP81UgHOH_gIAOeeQDaE,2425
10
+ toolslm-0.3.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ toolslm-0.3.17.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
12
+ toolslm-0.3.17.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
13
+ toolslm-0.3.17.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- toolslm/__init__.py,sha256=J0I0c7-a50EOnWXMryTu_E6xhXSYFBPjVpeYP_a3vRI,22
2
- toolslm/_modidx.py,sha256=kpgsDpj-Tvn90wezrHaMttyzhNcyNVgw_dQgK10qotI,5308
3
- toolslm/download.py,sha256=g3BxUSxylC_575M7RFSJ1GI3Co3EwPDdEeWzxaf2Czk,4451
4
- toolslm/funccall.py,sha256=0OBrx6KzI0KK13L-5Hn69yah9oZhgTsKchmMenCoT0A,10421
5
- toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
6
- toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
7
- toolslm/xml.py,sha256=TO3i6QD1g_ya8B7Wxwib2ZWv7pwVpfyaAalw1qrKb74,7148
8
- toolslm-0.3.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
- toolslm-0.3.7.dist-info/METADATA,sha256=IaRtHzIR_YzPP-XOAcO5EdSKWChZwgpSOz5HCdgGqGc,2403
10
- toolslm-0.3.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- toolslm-0.3.7.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
12
- toolslm-0.3.7.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
13
- toolslm-0.3.7.dist-info/RECORD,,