PyPI - toolslm - Versions diffs - 0.3.17__tar.gz → 0.3.19__tar.gz - Mend

toolslm 0.3.17tar.gz → 0.3.19tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{toolslm-0.3.17/toolslm.egg-info → toolslm-0.3.19}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: toolslm
-Version: 0.3.17
+Version: 0.3.19
 Summary: Tools to make language models a bit easier to use
 Home-page: https://github.com/AnswerDotAI/toolslm
 Author: Jeremy Howard

{toolslm-0.3.17 → toolslm-0.3.19}/settings.ini RENAMED Viewed

@@ -1,7 +1,7 @@
 [DEFAULT]
 repo = toolslm
 lib_name = toolslm
-version = 0.3.17
+version = 0.3.19
 min_python = 3.9
 license = apache2
 black_formatting = False

toolslm-0.3.19/toolslm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.3.19"

{toolslm-0.3.17 → toolslm-0.3.19}/toolslm/_modidx.py RENAMED Viewed

@@ -40,16 +40,19 @@ d = { 'settings': { 'branch': 'main',
             'toolslm.xml': { 'toolslm.xml._add_nls': ('xml.html#_add_nls', 'toolslm/xml.py'),
                              'toolslm.xml.cell2out': ('xml.html#cell2out', 'toolslm/xml.py'),
                              'toolslm.xml.cell2xml': ('xml.html#cell2xml', 'toolslm/xml.py'),
+                             'toolslm.xml.cells2xml': ('xml.html#cells2xml', 'toolslm/xml.py'),
                              'toolslm.xml.docs_xml': ('xml.html#docs_xml', 'toolslm/xml.py'),
                              'toolslm.xml.files2ctx': ('xml.html#files2ctx', 'toolslm/xml.py'),
                              'toolslm.xml.folder2ctx': ('xml.html#folder2ctx', 'toolslm/xml.py'),
                              'toolslm.xml.folder2ctx_cli': ('xml.html#folder2ctx_cli', 'toolslm/xml.py'),
+                             'toolslm.xml.get_docstring': ('xml.html#get_docstring', 'toolslm/xml.py'),
                              'toolslm.xml.get_mime_text': ('xml.html#get_mime_text', 'toolslm/xml.py'),
                              'toolslm.xml.json_to_xml': ('xml.html#json_to_xml', 'toolslm/xml.py'),
                              'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
                              'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
                              'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
                              'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
+                             'toolslm.xml.py2sigs': ('xml.html#py2sigs', 'toolslm/xml.py'),
                              'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
                              'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py'),
                              'toolslm.xml.sym2file': ('xml.html#sym2file', 'toolslm/xml.py'),

{toolslm-0.3.17 → toolslm-0.3.19}/toolslm/xml.py RENAMED Viewed

@@ -1,19 +1,19 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../00_xml.ipynb.
 # %% auto 0
-__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'mk_doctype', 'mk_doc', 'docs_xml',
-           'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx', 'sym2pkgpath', 'sym2pkgctx',
-           'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
+__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'cells2xml', 'nb2xml', 'get_docstring', 'py2sigs',
+           'mk_doctype', 'mk_doc', 'docs_xml', 'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx',
+           'sym2pkgpath', 'sym2pkgctx', 'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
 # %% ../00_xml.ipynb
-import hashlib, inspect, xml.etree.ElementTree as ET
+import hashlib, inspect, xml.etree.ElementTree as ET, ast
 from collections import namedtuple
 from ghapi.all import GhApi
 from fastcore.utils import *
 from fastcore.meta import delegates
 from fastcore.xtras import hl_md
-from fastcore.xml import to_xml, Document, Documents, Document_content, Src, Source,Out,Outs,Cell,Notebook,Md,Code
+from fastcore.xml import to_xml, Document, Documents, Document_content, Src, Source,Out,Outs,Cell,Notebook,Md,Code,Raw
 from fastcore.script import call_parse
 # %% ../00_xml.ipynb
@@ -50,10 +50,13 @@ def cell2out(o):
     if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')
 # %% ../00_xml.ipynb
-def cell2xml(cell, out=True, ids=True):
+_ctfuns = {'code': Code, 'markdown': Md, 'raw': Raw}
+def cell2xml(cell, out=True, ids=True, nums=False):
     "Convert notebook cell to concise XML format"
     src = ''.join(getattr(cell, 'source', ''))
-    f = Code if cell.cell_type=='code' else Md
+    if nums: src = '\n'.join(f'{i+1:6d} │ {l}' for i,l in enumerate(src.splitlines()))
+    f = _ctfuns[cell.cell_type]
     kw = dict(id=cell.id) if ids and hasattr(cell, 'id') else {}
     if not out: return f(src, **kw)
     parts = [Source(src)]
@@ -62,12 +65,40 @@ def cell2xml(cell, out=True, ids=True):
     return f(*parts, **kw)
 # %% ../00_xml.ipynb
-def nb2xml(fname=None, nb=None, out=True, ids=True):
+@delegates(cell2xml)
+def cells2xml(cells, wrap=Notebook, **kwargs):
+    "Convert notebook to XML format"
+    res = [cell2xml(c, **kwargs) for c in cells]
+    return to_xml(wrap(*res), do_escape=False)
+@delegates(cell2xml)
+def nb2xml(fname=None, nb=None, **kwargs):
     "Convert notebook to XML format"
     assert bool(fname)^bool(nb), "Pass either `fname` or `nb`"
     if not nb: nb = dict2obj(fname.read_json())
-    cells_xml = [to_xml(cell2xml(c, out=out, ids=ids), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
-    return to_xml(Notebook(*cells_xml), do_escape=False)
+    return cells2xml(nb.cells, **kwargs)
+# %% ../00_xml.ipynb
+def get_docstring(node, lines):
+    "Get docstring from source lines if present"
+    if not (node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant)): return None
+    doc_node = node.body[0]
+    return '\n'.join(lines[doc_node.lineno-1:doc_node.end_lineno])
+def py2sigs(fname=None, src=None):
+    "Return signature+docstring text for all functions and class methods in source"
+    if fname: src = Path(fname).expanduser().read_text()
+    tree = ast.parse(src)
+    lines = src.splitlines()
+    res = []
+    for node in ast.walk(tree):
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            body_start = max(node.body[0].lineno - 1, node.lineno)
+            sig = '\n'.join(lines[node.lineno-1:body_start])
+            doc = get_docstring(node, lines)
+            cts = f"{sig}\n{doc}" if doc else sig
+            res.append(cts.strip('\r\n'))
+    return '\n\n'.join(res)
 # %% ../00_xml.ipynb
 doctype = namedtuple('doctype', ['src', 'content'])
@@ -111,15 +142,17 @@ def docs_xml(docs:list[str],  # The content of each document
     pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
     if srcs is None: srcs = [None]*len(docs)
     if details is None: details = [{}]*len(docs)
-    docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
+    docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)) if d.strip())
     kw = dict(title=title) if title else {}
     return pre + to_xml(Documents(*docs, **kw), do_escape=False)
 # %% ../00_xml.ipynb
-def read_file(fname, out=True, max_size=None, ids=True):
+@delegates(nb2xml)
+def read_file(fname, max_size=None, sigs_only=False, **kwargs):
     "Read file content, converting notebooks to XML if needed"
-    fname = Path(fname)
-    if fname.suffix == '.ipynb': res = nb2xml(fname, out=out, ids=ids)
+    fname = Path(fname).expanduser()
+    if fname.suffix == '.ipynb': res = nb2xml(fname, **kwargs)
+    elif fname.suffix == '.py' and sigs_only: res = py2sigs(fname)
     else: res = fname.read_text()
     if max_size and len(res)>max_size: return f"[Skipped: {fname.name} exceeds {max_size} bytes]"
     return res
@@ -128,15 +161,17 @@ def read_file(fname, out=True, max_size=None, ids=True):
 @delegates(docs_xml)
 def files2ctx(
     fnames:list[Union[str,Path]], # List of file names to add to context
-    out:bool=True, # Include notebook cell outputs?
     srcs:Optional[list]=None, # Use the labels instead of `fnames`
     max_size:int=None, # Skip files larger than this (bytes)
+    out:bool=True, # Include notebook cell outputs?
     ids:bool=True,  # Include cell ids in notebooks?
+    nums:bool=False, # Include line numbers in notebook cell source?
+    sigs_only:bool=False, # For .py files, only include signatures and docstrings
     **kwargs
 )->str: # XML for LM context
     "Convert files to XML context, handling notebooks"
-    fnames = [Path(o) for o in fnames]
-    contents = [read_file(o, out=out, max_size=max_size, ids=ids) for o in fnames]
+    fnames = [Path(o).expanduser() for o in listify(fnames)]
+    contents = [read_file(o, max_size=max_size, out=out, ids=ids, sigs_only=sigs_only, nums=nums) for o in fnames]
     return docs_xml(contents, srcs or fnames, **kwargs)
 # %% ../00_xml.ipynb
@@ -151,16 +186,17 @@ def folder2ctx(
     max_total:int=10_000_000,  # Max total output size in bytes
     readme_first:bool=False,  # Prioritize README files at start of context?
     files_only:bool=False,  # Return dict of {filename: size} instead of context?
+    sigs_only:bool=False,  # Return signatures instead of full text for python files?
     ids:bool=True,  # Include cell ids in notebooks?
     **kwargs
 )->Union[str,dict]:
     "Convert folder contents to XML context, handling notebooks"
-    folder = Path(folder)
+    folder = Path(folder).expanduser()
     fnames = pglob(folder, **kwargs)
     if files_only: return {str(f.relative_to(folder)): f.stat().st_size for f in fnames}
     if readme_first: fnames = sorted(fnames, key=lambda f: (0 if 'readme' in f.name.lower() else 1, f))
     srcs = fnames if include_base else [f.relative_to(folder) for f in fnames]
-    res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size, ids=ids)
+    res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size, sigs_only=sigs_only, ids=ids)
     suf = f"\n\n[TRUNCATED: output size {{_outsz_}} exceeded max size {max_total} bytes]"
     if max_total and len(res) > max_total: res = truncstr(res, max_total, suf=suf, sizevar='_outsz_')
     return res

{toolslm-0.3.17 → toolslm-0.3.19/toolslm.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: toolslm
-Version: 0.3.17
+Version: 0.3.19
 Summary: Tools to make language models a bit easier to use
 Home-page: https://github.com/AnswerDotAI/toolslm
 Author: Jeremy Howard