toolslm 0.3.16__tar.gz → 0.3.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: toolslm
3
- Version: 0.3.16
3
+ Version: 0.3.18
4
4
  Summary: Tools to make language models a bit easier to use
5
5
  Home-page: https://github.com/AnswerDotAI/toolslm
6
6
  Author: Jeremy Howard
@@ -1,7 +1,7 @@
1
1
  [DEFAULT]
2
2
  repo = toolslm
3
3
  lib_name = toolslm
4
- version = 0.3.16
4
+ version = 0.3.18
5
5
  min_python = 3.9
6
6
  license = apache2
7
7
  black_formatting = False
@@ -0,0 +1 @@
1
+ __version__ = "0.3.18"
@@ -44,12 +44,14 @@ d = { 'settings': { 'branch': 'main',
44
44
  'toolslm.xml.files2ctx': ('xml.html#files2ctx', 'toolslm/xml.py'),
45
45
  'toolslm.xml.folder2ctx': ('xml.html#folder2ctx', 'toolslm/xml.py'),
46
46
  'toolslm.xml.folder2ctx_cli': ('xml.html#folder2ctx_cli', 'toolslm/xml.py'),
47
+ 'toolslm.xml.get_docstring': ('xml.html#get_docstring', 'toolslm/xml.py'),
47
48
  'toolslm.xml.get_mime_text': ('xml.html#get_mime_text', 'toolslm/xml.py'),
48
49
  'toolslm.xml.json_to_xml': ('xml.html#json_to_xml', 'toolslm/xml.py'),
49
50
  'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
50
51
  'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
51
52
  'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
52
53
  'toolslm.xml.parse_gh_url': ('xml.html#parse_gh_url', 'toolslm/xml.py'),
54
+ 'toolslm.xml.py2sigs': ('xml.html#py2sigs', 'toolslm/xml.py'),
53
55
  'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py'),
54
56
  'toolslm.xml.repo2ctx': ('xml.html#repo2ctx', 'toolslm/xml.py'),
55
57
  'toolslm.xml.sym2file': ('xml.html#sym2file', 'toolslm/xml.py'),
@@ -1,12 +1,12 @@
1
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../00_xml.ipynb.
2
2
 
3
3
  # %% auto 0
4
- __all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'mk_doctype', 'mk_doc', 'docs_xml',
5
- 'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx', 'sym2pkgpath', 'sym2pkgctx',
6
- 'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
4
+ __all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'get_docstring', 'py2sigs', 'mk_doctype',
5
+ 'mk_doc', 'docs_xml', 'read_file', 'files2ctx', 'folder2ctx', 'sym2file', 'sym2folderctx', 'sym2pkgpath',
6
+ 'sym2pkgctx', 'folder2ctx_cli', 'parse_gh_url', 'repo2ctx']
7
7
 
8
8
  # %% ../00_xml.ipynb
9
- import hashlib, inspect, xml.etree.ElementTree as ET
9
+ import hashlib, inspect, xml.etree.ElementTree as ET, ast
10
10
  from collections import namedtuple
11
11
  from ghapi.all import GhApi
12
12
 
@@ -50,24 +50,47 @@ def cell2out(o):
50
50
  if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')
51
51
 
52
52
  # %% ../00_xml.ipynb
53
- def cell2xml(cell, out=True):
53
+ def cell2xml(cell, out=True, ids=True):
54
54
  "Convert notebook cell to concise XML format"
55
55
  src = ''.join(getattr(cell, 'source', ''))
56
56
  f = Code if cell.cell_type=='code' else Md
57
- if not out: return f(src)
57
+ kw = dict(id=cell.id) if ids and hasattr(cell, 'id') else {}
58
+ if not out: return f(src, **kw)
58
59
  parts = [Source(src)]
59
60
  out_items = L(getattr(cell,'outputs',[])).map(cell2out).filter()
60
61
  if out_items: parts.append(Outs(*out_items))
61
- return f(*parts)
62
+ return f(*parts, **kw)
62
63
 
63
64
  # %% ../00_xml.ipynb
64
- def nb2xml(fname=None, nb=None, out=True):
65
+ def nb2xml(fname=None, nb=None, out=True, ids=True):
65
66
  "Convert notebook to XML format"
66
67
  assert bool(fname)^bool(nb), "Pass either `fname` or `nb`"
67
68
  if not nb: nb = dict2obj(fname.read_json())
68
- cells_xml = [to_xml(cell2xml(c, out=out), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
69
+ cells_xml = [to_xml(cell2xml(c, out=out, ids=ids), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
69
70
  return to_xml(Notebook(*cells_xml), do_escape=False)
70
71
 
72
+ # %% ../00_xml.ipynb
73
+ def get_docstring(node, lines):
74
+ "Get docstring from source lines if present"
75
+ if not (node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant)): return None
76
+ doc_node = node.body[0]
77
+ return '\n'.join(lines[doc_node.lineno-1:doc_node.end_lineno])
78
+
79
+ def py2sigs(fname=None, src=None):
80
+ "Return signature+docstring text for all functions and class methods in source"
81
+ if fname: src = Path(fname).read_text()
82
+ tree = ast.parse(src)
83
+ lines = src.splitlines()
84
+ res = []
85
+ for node in ast.walk(tree):
86
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
87
+ body_start = max(node.body[0].lineno - 1, node.lineno)
88
+ sig = '\n'.join(lines[node.lineno-1:body_start])
89
+ doc = get_docstring(node, lines)
90
+ cts = f"{sig}\n{doc}" if doc else sig
91
+ res.append(cts.strip('\r\n'))
92
+ return '\n\n'.join(res)
93
+
71
94
  # %% ../00_xml.ipynb
72
95
  doctype = namedtuple('doctype', ['src', 'content'])
73
96
 
@@ -110,15 +133,16 @@ def docs_xml(docs:list[str], # The content of each document
110
133
  pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
111
134
  if srcs is None: srcs = [None]*len(docs)
112
135
  if details is None: details = [{}]*len(docs)
113
- docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
136
+ docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)) if d.strip())
114
137
  kw = dict(title=title) if title else {}
115
138
  return pre + to_xml(Documents(*docs, **kw), do_escape=False)
116
139
 
117
140
  # %% ../00_xml.ipynb
118
- def read_file(fname, out=True, max_size=None):
141
+ def read_file(fname, out=True, max_size=None, ids=True, sigs_only=False):
119
142
  "Read file content, converting notebooks to XML if needed"
120
143
  fname = Path(fname)
121
- if fname.suffix == '.ipynb': res = nb2xml(fname, out=out)
144
+ if fname.suffix == '.ipynb': res = nb2xml(fname, out=out, ids=ids)
145
+ elif fname.suffix == '.py' and sigs_only: res = py2sigs(fname)
122
146
  else: res = fname.read_text()
123
147
  if max_size and len(res)>max_size: return f"[Skipped: {fname.name} exceeds {max_size} bytes]"
124
148
  return res
@@ -130,11 +154,13 @@ def files2ctx(
130
154
  out:bool=True, # Include notebook cell outputs?
131
155
  srcs:Optional[list]=None, # Use the labels instead of `fnames`
132
156
  max_size:int=None, # Skip files larger than this (bytes)
157
+ ids:bool=True, # Include cell ids in notebooks?
158
+ sigs_only:bool=False, # For .py files, only include signatures and docstrings
133
159
  **kwargs
134
160
  )->str: # XML for LM context
135
161
  "Convert files to XML context, handling notebooks"
136
162
  fnames = [Path(o) for o in fnames]
137
- contents = [read_file(o, out=out, max_size=max_size) for o in fnames]
163
+ contents = [read_file(o, out=out, max_size=max_size, ids=ids, sigs_only=sigs_only) for o in fnames]
138
164
  return docs_xml(contents, srcs or fnames, **kwargs)
139
165
 
140
166
  # %% ../00_xml.ipynb
@@ -149,6 +175,8 @@ def folder2ctx(
149
175
  max_total:int=10_000_000, # Max total output size in bytes
150
176
  readme_first:bool=False, # Prioritize README files at start of context?
151
177
  files_only:bool=False, # Return dict of {filename: size} instead of context?
178
+ sigs_only:bool=False, # Return signatures instead of full text for python files?
179
+ ids:bool=True, # Include cell ids in notebooks?
152
180
  **kwargs
153
181
  )->Union[str,dict]:
154
182
  "Convert folder contents to XML context, handling notebooks"
@@ -157,7 +185,7 @@ def folder2ctx(
157
185
  if files_only: return {str(f.relative_to(folder)): f.stat().st_size for f in fnames}
158
186
  if readme_first: fnames = sorted(fnames, key=lambda f: (0 if 'readme' in f.name.lower() else 1, f))
159
187
  srcs = fnames if include_base else [f.relative_to(folder) for f in fnames]
160
- res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size)
188
+ res = files2ctx(fnames, prefix=prefix, out=out, srcs=srcs, title=title, max_size=max_size, sigs_only=sigs_only, ids=ids)
161
189
  suf = f"\n\n[TRUNCATED: output size {{_outsz_}} exceeded max size {max_total} bytes]"
162
190
  if max_total and len(res) > max_total: res = truncstr(res, max_total, suf=suf, sizevar='_outsz_')
163
191
  return res
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: toolslm
3
- Version: 0.3.16
3
+ Version: 0.3.18
4
4
  Summary: Tools to make language models a bit easier to use
5
5
  Home-page: https://github.com/AnswerDotAI/toolslm
6
6
  Author: Jeremy Howard
@@ -1 +0,0 @@
1
- __version__ = "0.3.16"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes