PyPI - toolslm - Versions diffs - 0.3.7__py3-none-any.whl → 0.3.17__py3-none-any.whl - Mend

toolslm 0.3.7py3-none-any.whl → 0.3.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

toolslm/__init__.py +1 -1
toolslm/_modidx.py +6 -1
toolslm/download.py +1 -0
toolslm/funccall.py +24 -11
toolslm/xml.py +111 -41
{toolslm-0.3.7.dist-info → toolslm-0.3.17.dist-info}/METADATA +3 -2
toolslm-0.3.17.dist-info/RECORD +13 -0
toolslm-0.3.7.dist-info/RECORD +0 -13
{toolslm-0.3.7.dist-info → toolslm-0.3.17.dist-info}/WHEEL +0 -0
{toolslm-0.3.7.dist-info → toolslm-0.3.17.dist-info}/entry_points.txt +0 -0
{toolslm-0.3.7.dist-info → toolslm-0.3.17.dist-info}/licenses/LICENSE +0 -0
{toolslm-0.3.7.dist-info → toolslm-0.3.17.dist-info}/top_level.txt +0 -0

toolslm/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.7"
1	+ __version__ = "0.3.17"

toolslm/_modidx.py CHANGED Viewed

@@ -49,5 +49,10 @@ d = { 'settings': { 'branch': 'main',
                              'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
                              'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
                              'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
+                             'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
                              'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
-                             'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py')}}}
+                             'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py'),
+                             'toolslm.xml.sym2file': ('xml.html#sym2file', 'toolslm/xml.py'),
+                             'toolslm.xml.sym2folderctx': ('xml.html#sym2folderctx', 'toolslm/xml.py'),
+                             'toolslm.xml.sym2pkgctx': ('xml.html#sym2pkgctx', 'toolslm/xml.py'),
+                             'toolslm.xml.sym2pkgpath': ('xml.html#sym2pkgpath', 'toolslm/xml.py')}}}

toolslm/download.py CHANGED Viewed

@@ -8,6 +8,7 @@ from fastcore.utils import *
 from httpx import get
 from fastcore.meta import delegates
 from urllib.parse import urlparse, urljoin
+from .xml import parse_gh_url
 # %% ../03_download.ipynb
 def clean_md(text, rm_comments=True, rm_details=True):

toolslm/funccall.py CHANGED Viewed

@@ -5,7 +5,7 @@ __all__ = ['empty', 'custom_types', 'get_schema', 'python', 'mk_ns', 'call_func'
            'mk_tool']
 # %% ../01_funccall.ipynb
-import inspect, json
+import inspect, json, ast
 from collections import abc
 from fastcore.utils import *
 from fastcore.docments import docments
@@ -37,12 +37,20 @@ def _types(t:type)->tuple[str,Optional[str]]:
     else: return tmap.get(t.__name__, "object"), None
 # %% ../01_funccall.ipynb
-def _param(name, info):
-    "json schema parameter given `name` and `info` from docments full dict."
+def _param(
+    name, # param name
+    info, # dict from docments
+    evalable=False): # stringify defaults that can't be literal_eval'd?
+    "json schema parameter given `name` and `info` from docments full dict"
     paramt,itemt = _types(info.anno)
     pschema = dict(type=paramt, description=info.docment or "")
     if itemt: pschema["items"] = {"type": itemt}
-    if info.default is not empty: pschema["default"] = info.default
+    if info.default is not empty:
+        if evalable:
+            try: ast.literal_eval(repr(info.default))
+            except: pschema["default"] = str(info.default)
+            else: pschema["default"] = info.default
+        else: pschema["default"] = info.default
     return pschema
 # %% ../01_funccall.ipynb
@@ -90,9 +98,9 @@ def _handle_container(origin, args, defs):
     return None
 # %% ../01_funccall.ipynb
-def _process_property(name, obj, props, req, defs):
+def _process_property(name, obj, props, req, defs, evalable=False):
     "Process a single property of the schema"
-    p = _param(name, obj)
+    p = _param(name, obj, evalable=evalable)
     props[name] = p
     if obj.default is empty: req[name] = True
@@ -103,14 +111,14 @@ def _process_property(name, obj, props, req, defs):
         p.update(_handle_type(obj.anno, defs))
 # %% ../01_funccall.ipynb
-def _get_nested_schema(obj):
+def _get_nested_schema(obj, evalable=False, skip_hidden=False):
     "Generate nested JSON schema for a class or function"
     d = docments(obj, full=True)
     props, req, defs = {}, {}, {}
     for n, o in d.items():
-        if n != 'return' and n != 'self':
-            _process_property(n, o, props, req, defs)
+        if n != 'return' and n != 'self' and not (skip_hidden and n.startswith('_')):
+            _process_property(n, o, props, req, defs, evalable=evalable)
     tkw = {}
     if isinstance(obj, type): tkw['title']=obj.__name__
@@ -120,10 +128,15 @@ def _get_nested_schema(obj):
     return schema
 # %% ../01_funccall.ipynb
-def get_schema(f:Union[callable,dict], pname='input_schema')->dict:
+def get_schema(
+    f:Union[callable,dict], # Function to get schema for
+    pname='input_schema',   # Key name for parameters
+    evalable=False,  # stringify defaults that can't be literal_eval'd?
+    skip_hidden=False # skip parameters starting with '_'?
+)->dict: # {'name':..., 'description':..., pname:...}
     "Generate JSON schema for a class, function, or method"
     if isinstance(f, dict): return f
-    schema = _get_nested_schema(f)
+    schema = _get_nested_schema(f, evalable=evalable, skip_hidden=skip_hidden)
     desc = f.__doc__
     assert desc, "Docstring missing!"
     d = docments(f, full=True)

toolslm/xml.py CHANGED Viewed

@@ -2,10 +2,11 @@
 # %% auto 0
 __all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'mk_doctype', 'mk_doc', 'docs_xml',
-           'read_file', 'files2ctx', 'folder2ctx', 'repo2ctx', 'folder2ctx_cli']
+           'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx', 'sym2pkgpath', 'sym2pkgctx',
+           'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
 # %% ../00_xml.ipynb
-import hashlib,xml.etree.ElementTree as ET
+import hashlib, inspect, xml.etree.ElementTree as ET
 from collections import namedtuple
 from ghapi.all import GhApi
@@ -49,22 +50,23 @@ def cell2out(o):
     if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')
 # %% ../00_xml.ipynb
-def cell2xml(cell, out=True):
+def cell2xml(cell, out=True, ids=True):
     "Convert notebook cell to concise XML format"
     src = ''.join(getattr(cell, 'source', ''))
     f = Code if cell.cell_type=='code' else Md
-    if not out: return f(src)
+    kw = dict(id=cell.id) if ids and hasattr(cell, 'id') else {}
+    if not out: return f(src, **kw)
     parts = [Source(src)]
     out_items = L(getattr(cell,'outputs',[])).map(cell2out).filter()
     if out_items: parts.append(Outs(*out_items))
-    return f(*parts)
+    return f(*parts, **kw)
 # %% ../00_xml.ipynb
-def nb2xml(fname=None, nb=None, out=True):
+def nb2xml(fname=None, nb=None, out=True, ids=True):
     "Convert notebook to XML format"
     assert bool(fname)^bool(nb), "Pass either `fname` or `nb`"
     if not nb: nb = dict2obj(fname.read_json())
-    cells_xml = [to_xml(cell2xml(c, out=out), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
+    cells_xml = [to_xml(cell2xml(c, out=out, ids=ids), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
     return to_xml(Notebook(*cells_xml), do_escape=False)
 # %% ../00_xml.ipynb
@@ -101,68 +103,95 @@ def mk_doc(index:int,  # The document index
 # %% ../00_xml.ipynb
 def docs_xml(docs:list[str],  # The content of each document
              srcs:Optional[list]=None,  # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
-             prefix:bool=True, # Include Anthropic's suggested prose intro?
-             details:Optional[list]=None # Optional list of dicts with additional attrs for each doc
+             prefix:bool=False, # Include Anthropic's suggested prose intro?
+             details:Optional[list]=None, # Optional list of dicts with additional attrs for each doc
+             title:str=None # Optional title attr for Documents element
             )->str:
     "Create an XML string containing `docs` in Anthropic's recommended format"
     pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
     if srcs is None: srcs = [None]*len(docs)
     if details is None: details = [{}]*len(docs)
     docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
-    return pre + to_xml(Documents(docs), do_escape=False)
+    kw = dict(title=title) if title else {}
+    return pre + to_xml(Documents(*docs, **kw), do_escape=False)
 # %% ../00_xml.ipynb
-def read_file(fname, out=True):
+def read_file(fname, out=True, max_size=None, ids=True):
     "Read file content, converting notebooks to XML if needed"
     fname = Path(fname)
-    if fname.suffix == '.ipynb': return nb2xml(fname, out=out)
-    return fname.read_text()
+    if fname.suffix == '.ipynb': res = nb2xml(fname, out=out, ids=ids)
+    else: res = fname.read_text()
+    if max_size and len(res)>max_size: return f"[Skipped: {fname.name} exceeds {max_size} bytes]"
+    return res
 # %% ../00_xml.ipynb
+@delegates(docs_xml)
 def files2ctx(
     fnames:list[Union[str,Path]], # List of file names to add to context
-    prefix:bool=True, # Include Anthropic's suggested prose intro?
     out:bool=True, # Include notebook cell outputs?
-    srcs:Optional[list]=None # Use the labels instead of `fnames`
+    srcs:Optional[list]=None, # Use the labels instead of `fnames`
+    max_size:int=None, # Skip files larger than this (bytes)
+    ids:bool=True,  # Include cell ids in notebooks?
+    **kwargs
 )->str: # XML for LM context
     "Convert files to XML context, handling notebooks"
     fnames = [Path(o) for o in fnames]
-    contents = [read_file(o, out=out) for o in fnames]
-    return docs_xml(contents, srcs or fnames, prefix=prefix)
+    contents = [read_file(o, out=out, max_size=max_size, ids=ids) for o in fnames]
+    return docs_xml(contents, srcs or fnames, **kwargs)
 # %% ../00_xml.ipynb
 @delegates(globtastic)
 def folder2ctx(
-    folder:Union[str,Path],
-    prefix:bool=True,
-    out:bool=True,
-    include_base:bool=True,
+    folder:Union[str,Path], # Folder to read
+    prefix:bool=False, # Include Anthropic's suggested prose intro?
+    out:bool=True, # Include notebook cell outputs?
+    include_base:bool=True, # Include full path in src?
+    title:str=None, # Optional title attr for Documents element
+    max_size:int=100_000, # Skip files larger than this (bytes)
+    max_total:int=10_000_000,  # Max total output size in bytes
+    readme_first:bool=False,  # Prioritize README files at start of context?
+    files_only:bool=False,  # Return dict of {filename: size} instead of context?
+    ids:bool=True,  # Include cell ids in notebooks?
     **kwargs
-)->str:
+)->Union[str,dict]:
     "Convert folder contents to XML context, handling notebooks"
     folder = Path(folder)
-    fnames = globtastic(folder, **kwargs)
-    srcs = fnames if include_base else [Path(f).relative_to(folder) for f in fnames]
-    return files2ctx(fnames, prefix=prefix, out=out, srcs=srcs)
+    fnames = pglob(folder, **kwargs)
+    if files_only: return {str(f.relative_to(folder)): f.stat().st_size for f in fnames}
+    if readme_first: fnames = sorted(fnames, key=lambda f: (0 if 'readme' in f.name.lower() else 1, f))
+    srcs = fnames if include_base else [f.relative_to(folder) for f in fnames]
+    res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size, ids=ids)
+    suf = f"\n\n[TRUNCATED: output size {{_outsz_}} exceeded max size {max_total} bytes]"
+    if max_total and len(res) > max_total: res = truncstr(res, max_total, suf=suf, sizevar='_outsz_')
+    return res
+# %% ../00_xml.ipynb
+def sym2file(sym):
+    "Return md string with filepath and contents for a symbol's source file"
+    f = Path(inspect.getfile(sym))
+    return f"- `{f}`\n\n````\n{f.read_text()}\n````"
 # %% ../00_xml.ipynb
 @delegates(folder2ctx)
-def repo2ctx(
-    owner:str,  # GitHub repo owner
-    repo:str,   # GitHub repo name
-    ref:str=None,  # Git ref (branch/tag/sha); defaults to repo's default branch
-    **kwargs  # Passed to `folder2ctx`
-)->str:  # XML for LM context
-    "Convert GitHub repo to XML context without cloning"
-    import tempfile, tarfile, io
-    api = GhApi()
-    if ref is None: ref = api.repos.get(owner, repo).default_branch
-    data = api.repos.download_tarball_archive(owner, repo, ref)
-    tf = tarfile.open(fileobj=io.BytesIO(data))
-    with tempfile.TemporaryDirectory() as tmp:
-        tf.extractall(tmp, filter='data')
-        subdir = Path(tmp) / tf.getmembers()[0].name.split('/')[0]
-        return folder2ctx(subdir, include_base=False, **kwargs)
+def sym2folderctx(
+    sym,
+    types:str|list='py',  # list or comma-separated str of ext types from: py, js, java, c, cpp, rb, r, ex, sh, web, doc, cfg
+    skip_file_re=r'^_mod',
+    **kwargs):
+    "Return folder context for a symbol's source file location"
+    return folder2ctx(Path(inspect.getfile(sym)).parent, types=types, skip_file_re=skip_file_re, **kwargs)
+# %% ../00_xml.ipynb
+def sym2pkgpath(sym):
+    "Get root package path for a symbol"
+    root = sym.__module__.split('.')[0]
+    return Path(sys.modules[root].__path__[0])
+# %% ../00_xml.ipynb
+@delegates(folder2ctx)
+def sym2pkgctx(sym, types:str|list='py', skip_file_re=r'^_mod', **kwargs):
+    "Return repo context for a symbol's root package"
+    return folder2ctx(sym2pkgpath(sym), types=types, skip_file_re=skip_file_re, **kwargs)
 # %% ../00_xml.ipynb
 @call_parse
@@ -174,3 +203,44 @@ def folder2ctx_cli(
 )->str: # XML for Claude context
     "CLI to convert folder contents to XML context, handling notebooks"
     print(folder2ctx(folder, out=out, **kwargs))
+# %% ../00_xml.ipynb
+def parse_gh_url(url):
+    "Parse GitHub URL into (owner, repo, type, ref, path) or None"
+    m = re.match(r'https?://(?:www\.)?github\.com/([^/]+)/([^/]+)(?:/([^/]+)(?:/([^/]+)(?:/(.+))?)?)?', url)
+    return dict(zip('owner repo typ ref path'.split(), m.groups())) if m else None
+# %% ../00_xml.ipynb
+@delegates(folder2ctx)
+def repo2ctx(
+    owner:str,  # GitHub repo owner or "owner/repo" or a full github URL
+    repo:str=None,   # GitHub repo name (leave empty if using "owner/repo" or URL format for owner param)
+    ref:str=None,  # Git ref (branch/tag/sha) (get from URL not provided); defaults to repo's default branch
+    folder:str=None,  # Only include files under this path (get from URL not provided)
+    show_filters:bool=True,  # Include filter info in title?
+    token:str=None,  # GitHub token (uses GITHUB_TOKEN env var if None)
+    **kwargs  # Passed to `folder2ctx`
+)->Union[str,dict]:  # XML for LM context, or dict of file sizes
+    "Convert GitHub repo to XML context without cloning"
+    import tempfile, tarfile, io
+    if owner.startswith('http'):
+        parsed = parse_gh_url(owner)
+        if not parsed: raise ValueError(f"Invalid GitHub URL: {owner}")
+        owner,repo = parsed['owner'], parsed['repo']
+        ref = ref or parsed.get('ref')
+        folder = folder or parsed.get('path')
+    if repo is None: owner, repo = owner.split('/')
+    api = GhApi(token=token)
+    if ref is None: ref = api.repos.get(owner, repo).default_branch
+    data = api.repos.download_tarball_archive(owner, repo, ref)
+    title = f"GitHub repository contents from {owner}/{repo}/{ref}"
+    if folder: title += f'/{folder}'
+    if show_filters:
+        parts = [f"{k}: {', '.join(v) if isinstance(v, (list,tuple)) else v}" for k,v in kwargs.items() if v]
+        if parts: title += f" (filters applied -- {' | '.join(parts)})"
+    tf = tarfile.open(fileobj=io.BytesIO(data))
+    with tempfile.TemporaryDirectory() as tmp:
+        tf.extractall(tmp, filter='data')
+        subdir = Path(tmp) / tf.getmembers()[0].name.split('/')[0]
+        if folder: subdir = subdir/folder
+        return folder2ctx(subdir, include_base=False, title=title, readme_first=True, **kwargs)

{toolslm-0.3.7.dist-info → toolslm-0.3.17.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: toolslm
-Version: 0.3.7
+Version: 0.3.17
 Summary: Tools to make language models a bit easier to use
 Home-page: https://github.com/AnswerDotAI/toolslm
 Author: Jeremy Howard
@@ -16,8 +16,9 @@ Classifier: License :: OSI Approved :: Apache Software License
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: fastcore>=1.9.6
+Requires-Dist: fastcore>=1.9.7
 Requires-Dist: httpx
+Requires-Dist: ghapi
 Provides-Extra: dev
 Dynamic: author
 Dynamic: author-email

toolslm-0.3.17.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+toolslm/__init__.py,sha256=HXhmv802-3PQnM5q29vlyrO10zGWKWZQ7xsx3qPYVRM,23
+toolslm/_modidx.py,sha256=EC1pFuHb5MbfRMml7RXx1sxGXlTiczjUimXICuXUMn0,5806
+toolslm/download.py,sha256=yMhyY3u26XRr6a4eZuCCmkprS7LQhHASl01Zn2B4q_o,4481
+toolslm/funccall.py,sha256=_5TyhTjWaWLi-eJ96-4P3_faFv6Ft07nO60UjCF-bPU,11160
+toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
+toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
+toolslm/xml.py,sha256=I2lRJPVG6us1g_gOTOKbbnZdlGB3g2-6MYGcLSqkFrM,11173
+toolslm-0.3.17.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+toolslm-0.3.17.dist-info/METADATA,sha256=jSPVU5a6Qe0eYmuxTqj30o0YP81UgHOH_gIAOeeQDaE,2425
+toolslm-0.3.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+toolslm-0.3.17.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
+toolslm-0.3.17.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
+toolslm-0.3.17.dist-info/RECORD,,

toolslm-0.3.7.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-toolslm/__init__.py,sha256=J0I0c7-a50EOnWXMryTu_E6xhXSYFBPjVpeYP_a3vRI,22
-toolslm/_modidx.py,sha256=kpgsDpj-Tvn90wezrHaMttyzhNcyNVgw_dQgK10qotI,5308
-toolslm/download.py,sha256=g3BxUSxylC_575M7RFSJ1GI3Co3EwPDdEeWzxaf2Czk,4451
-toolslm/funccall.py,sha256=0OBrx6KzI0KK13L-5Hn69yah9oZhgTsKchmMenCoT0A,10421
-toolslm/md_hier.py,sha256=r_NPezhgfxjRmSYFlu_ND42hXt1qSbaPWHTcjbviOn4,11010
-toolslm/shell.py,sha256=dGInuRKvexu21VmtZkw_0S3BGiTsbAongUG-yG4YHpc,1566
-toolslm/xml.py,sha256=TO3i6QD1g_ya8B7Wxwib2ZWv7pwVpfyaAalw1qrKb74,7148
-toolslm-0.3.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-toolslm-0.3.7.dist-info/METADATA,sha256=IaRtHzIR_YzPP-XOAcO5EdSKWChZwgpSOz5HCdgGqGc,2403
-toolslm-0.3.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-toolslm-0.3.7.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
-toolslm-0.3.7.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
-toolslm-0.3.7.dist-info/RECORD,,

{toolslm-0.3.7.dist-info → toolslm-0.3.17.dist-info}/WHEEL RENAMED Viewed

File without changes

{toolslm-0.3.7.dist-info → toolslm-0.3.17.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{toolslm-0.3.7.dist-info → toolslm-0.3.17.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{toolslm-0.3.7.dist-info → toolslm-0.3.17.dist-info}/top_level.txt RENAMED Viewed

File without changes

toolslm 0.3.7__py3-none-any.whl → 0.3.17__py3-none-any.whl

toolslm 0.3.7py3-none-any.whl → 0.3.17py3-none-any.whl