toolslm 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toolslm/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.0.5"
1
+ __version__ = "0.0.7"
toolslm/_modidx.py CHANGED
@@ -5,12 +5,28 @@ d = { 'settings': { 'branch': 'main',
5
5
  'doc_host': 'https://AnswerDotAI.github.io',
6
6
  'git_url': 'https://github.com/AnswerDotAI/toolslm',
7
7
  'lib_path': 'toolslm'},
8
- 'syms': { 'toolslm.funccall': { 'toolslm.funccall._copy_loc': ('funccall.html#_copy_loc', 'toolslm/funccall.py'),
8
+ 'syms': { 'toolslm.download': { 'toolslm.download._tryget': ('download.html#_tryget', 'toolslm/download.py'),
9
+ 'toolslm.download.clean_md': ('download.html#clean_md', 'toolslm/download.py'),
10
+ 'toolslm.download.find_docs': ('download.html#find_docs', 'toolslm/download.py'),
11
+ 'toolslm.download.get_llmstxt': ('download.html#get_llmstxt', 'toolslm/download.py'),
12
+ 'toolslm.download.html2md': ('download.html#html2md', 'toolslm/download.py'),
13
+ 'toolslm.download.read_docs': ('download.html#read_docs', 'toolslm/download.py'),
14
+ 'toolslm.download.read_html': ('download.html#read_html', 'toolslm/download.py'),
15
+ 'toolslm.download.read_md': ('download.html#read_md', 'toolslm/download.py'),
16
+ 'toolslm.download.split_url': ('download.html#split_url', 'toolslm/download.py')},
17
+ 'toolslm.funccall': { 'toolslm.funccall._copy_loc': ('funccall.html#_copy_loc', 'toolslm/funccall.py'),
18
+ 'toolslm.funccall._get_nested_schema': ('funccall.html#_get_nested_schema', 'toolslm/funccall.py'),
19
+ 'toolslm.funccall._handle_container': ('funccall.html#_handle_container', 'toolslm/funccall.py'),
20
+ 'toolslm.funccall._handle_type': ('funccall.html#_handle_type', 'toolslm/funccall.py'),
9
21
  'toolslm.funccall._param': ('funccall.html#_param', 'toolslm/funccall.py'),
22
+ 'toolslm.funccall._process_property': ('funccall.html#_process_property', 'toolslm/funccall.py'),
10
23
  'toolslm.funccall._run': ('funccall.html#_run', 'toolslm/funccall.py'),
11
24
  'toolslm.funccall._types': ('funccall.html#_types', 'toolslm/funccall.py'),
25
+ 'toolslm.funccall.call_func': ('funccall.html#call_func', 'toolslm/funccall.py'),
12
26
  'toolslm.funccall.get_schema': ('funccall.html#get_schema', 'toolslm/funccall.py'),
27
+ 'toolslm.funccall.mk_ns': ('funccall.html#mk_ns', 'toolslm/funccall.py'),
13
28
  'toolslm.funccall.python': ('funccall.html#python', 'toolslm/funccall.py')},
29
+ 'toolslm.md_hier': {},
14
30
  'toolslm.shell': { 'toolslm.shell.TerminalInteractiveShell.run_cell': ( 'shell.html#terminalinteractiveshell.run_cell',
15
31
  'toolslm/shell.py'),
16
32
  'toolslm.shell.get_shell': ('shell.html#get_shell', 'toolslm/shell.py')},
toolslm/download.py ADDED
@@ -0,0 +1,109 @@
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../03_download.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['clean_md', 'read_md', 'html2md', 'read_html', 'get_llmstxt', 'split_url', 'find_docs', 'read_docs']
5
+
6
+ # %% ../03_download.ipynb 2
7
+ from fastcore.utils import *
8
+ from httpx import get
9
+ from fastcore.meta import delegates
10
+ from llms_txt import *
11
+
12
+ from html2text import HTML2Text
13
+ from bs4 import BeautifulSoup
14
+ from urllib.parse import urlparse, urljoin
15
+
16
+ # %% ../03_download.ipynb 4
17
+ def clean_md(text, rm_comments=True, rm_details=True):
18
+ "Remove comments and `<details>` sections from `text`"
19
+ if rm_comments: text = re.sub(r'\n?<!--.*?-->\n?', '', text, flags=re.DOTALL)
20
+ if rm_details: text = re.sub(r'\n?<details>.*?</details>\n?', '', text, flags=re.DOTALL)
21
+ return text
22
+
23
+ # %% ../03_download.ipynb 5
24
+ @delegates(get)
25
+ def read_md(url, rm_comments=True, rm_details=True, **kwargs):
26
+ "Read text from `url` and clean with `clean_docs`"
27
+ return clean_md(get(url, **kwargs).text, rm_comments=rm_comments, rm_details=rm_details)
28
+
29
+ # %% ../03_download.ipynb 7
30
+ def html2md(s:str):
31
+ "Convert `s` from HTML to markdown"
32
+ o = HTML2Text(bodywidth=5000)
33
+ o.ignore_links = True
34
+ o.mark_code = True
35
+ o.ignore_images = True
36
+ return o.handle(s)
37
+
38
+ # %% ../03_download.ipynb 8
39
+ def read_html(url, # URL to read
40
+ sel=None, # Read only outerHTML of CSS selector `sel`
41
+ rm_comments=True, # Removes HTML comments
42
+ rm_details=True, # Removes `<details>` tags
43
+ multi=False, # Get all matches to `sel` or first one
44
+ wrap_tag=None, #If multi, each selection wrapped with <wrap_tag>content</wrap_tag>
45
+ ): # Cleaned markdown
46
+ "Get `url`, optionally selecting CSS selector `sel`, and convert to clean markdown"
47
+ page = get(url).text
48
+ if sel:
49
+ soup = BeautifulSoup(page, 'html.parser')
50
+ if multi:
51
+ page = [str(el) for el in soup.select(sel)]
52
+ if not wrap_tag: page = "\n".join(page)
53
+ else: page = str(soup.select_one(sel))
54
+ mds = map(lambda x: clean_md(html2md(x), rm_comments, rm_details=rm_details), tuplify(page))
55
+ if wrap_tag: return '\n'.join([f"\n<{wrap_tag}>\n{o}</{wrap_tag}>\n" for o in mds])
56
+ else: return'\n'.join(mds)
57
+
58
+
59
+ # %% ../03_download.ipynb 12
60
+ def get_llmstxt(url, optional=False, n_workers=None):
61
+ "Get llms.txt file from and expand it with `llms_txt.create_ctx()`"
62
+ if not url.endswith('llms.txt'): return None
63
+ resp = get(url)
64
+ if resp.status_code!=200: return None
65
+ return create_ctx(resp.text, optional=optional, n_workers=n_workers)
66
+
67
+ # %% ../03_download.ipynb 14
68
+ def split_url(url):
69
+ "Split `url` into base, path, and file name, normalising name to '/' if empty"
70
+ parsed = urlparse(url.strip('/'))
71
+ base = f"{parsed.scheme}://{parsed.netloc}"
72
+ path,spl,fname = parsed.path.rpartition('/')
73
+ fname = spl+fname
74
+ if not path and not fname: path='/'
75
+ return base,path,fname
76
+
77
+ # %% ../03_download.ipynb 16
78
+ def _tryget(url):
79
+ "Return response from `url` if `status_code!=404`, otherwise `None`"
80
+ res = get(url)
81
+ return None if res.status_code==404 else url
82
+
83
+ # %% ../03_download.ipynb 17
84
+ def find_docs(url):
85
+ "If available, return LLM-friendly llms.txt context or markdown file location from `url`"
86
+ base,path,fname = split_url(url)
87
+ url = (base+path+fname).strip('/')
88
+ if fname=='/llms.txt': return url
89
+ if Path(fname).suffix in('.md', '.txt', '.rst'): return _tryget(url)
90
+ if '.' in fname: return _tryget(url+'.md') or find_docs(url[:url.rfind('/')])
91
+ res = _tryget(url+'/llms.txt')
92
+ if res: return res
93
+ res = _tryget(url+'/index.md')
94
+ if res: return res
95
+ res = _tryget(url+'/index.html.md')
96
+ if res: return res
97
+ res = _tryget(url+'/index-commonmark.md')
98
+ if res: return res
99
+ parsed_url = urlparse(url)
100
+ if parsed_url.path == '/' or not parsed_url.path: return None
101
+ return find_docs(urljoin(url, '..'))
102
+
103
+ # %% ../03_download.ipynb 23
104
+ def read_docs(url, optional=False, n_workers=None, rm_comments=True, rm_details=True):
105
+ "If available, return LLM-friendly llms.txt context or markdown file response for `url`"
106
+ url = find_docs(url)
107
+ if url.endswith('/llms.txt'): res = get_llmstxt(url, optional=optional, n_workers=n_workers)
108
+ else: res = get(url).text
109
+ return clean_md(res, rm_comments=rm_comments, rm_details=rm_details)
toolslm/funccall.py CHANGED
@@ -1,10 +1,11 @@
1
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../01_funccall.ipynb.
2
2
 
3
3
  # %% auto 0
4
- __all__ = ['empty', 'get_schema', 'python']
4
+ __all__ = ['empty', 'get_schema', 'python', 'mk_ns', 'call_func']
5
5
 
6
6
  # %% ../01_funccall.ipynb 2
7
7
  import inspect
8
+ from collections import abc
8
9
  from fastcore.utils import *
9
10
  from fastcore.docments import docments
10
11
 
@@ -16,10 +17,12 @@ def _types(t:type)->tuple[str,Optional[str]]:
16
17
  "Tuple of json schema type name and (if appropriate) array item name."
17
18
  if t is empty: raise TypeError('Missing type')
18
19
  tmap = {int:"integer", float:"number", str:"string", bool:"boolean", list:"array", dict:"object"}
19
- if getattr(t, '__origin__', None) in (list,tuple): return "array", tmap.get(t.__args__[0], "object")
20
- else: return tmap[t], None
20
+ tmap.update({k.__name__: v for k, v in tmap.items()})
21
+ if getattr(t, '__origin__', None) in (list,tuple): return "array", tmap.get(t.__args__[0].__name__, "object")
22
+ elif isinstance(t, str): return tmap.get(t, "object"), None
23
+ else: return tmap.get(t.__name__, "object"), None
21
24
 
22
- # %% ../01_funccall.ipynb 14
25
+ # %% ../01_funccall.ipynb 16
23
26
  def _param(name, info):
24
27
  "json schema parameter given `name` and `info` from docments full dict."
25
28
  paramt,itemt = _types(info.anno)
@@ -28,28 +31,76 @@ def _param(name, info):
28
31
  if info.default is not empty: pschema["default"] = info.default
29
32
  return pschema
30
33
 
31
- # %% ../01_funccall.ipynb 17
34
+ # %% ../01_funccall.ipynb 19
35
+ def _handle_type(t, defs):
36
+ "Handle a single type, creating nested schemas if necessary"
37
+ if isinstance(t, type) and not issubclass(t, (int, float, str, bool)):
38
+ defs[t.__name__] = _get_nested_schema(t)
39
+ return {'$ref': f'#/$defs/{t.__name__}'}
40
+ return {'type': _types(t)[0]}
41
+
42
+ # %% ../01_funccall.ipynb 20
43
+ def _handle_container(origin, args, defs):
44
+ "Handle container types like dict, list, tuple, set"
45
+ if origin is dict:
46
+ value_type = args[1].__args__[0] if hasattr(args[1], '__args__') else args[1]
47
+ return {
48
+ 'type': 'object',
49
+ 'additionalProperties': (
50
+ {'type': 'array', 'items': _handle_type(value_type, defs)}
51
+ if hasattr(args[1], '__origin__') else _handle_type(args[1], defs)
52
+ )
53
+ }
54
+ elif origin in (list, tuple, set):
55
+ schema = {'type': 'array', 'items': _handle_type(args[0], defs)}
56
+ if origin is set:
57
+ schema['uniqueItems'] = True
58
+ return schema
59
+ return None
60
+
61
+ # %% ../01_funccall.ipynb 21
62
+ def _process_property(name, obj, props, req, defs):
63
+ "Process a single property of the schema"
64
+ p = _param(name, obj)
65
+ props[name] = p
66
+ if obj.default is empty: req[name] = True
67
+
68
+ if hasattr(obj.anno, '__origin__'):
69
+ p.update(_handle_container(obj.anno.__origin__, obj.anno.__args__, defs))
70
+ else:
71
+ p.update(_handle_type(obj.anno, defs))
72
+
73
+ # %% ../01_funccall.ipynb 22
74
+ def _get_nested_schema(obj):
75
+ "Generate nested JSON schema for a class or function"
76
+ d = docments(obj, full=True)
77
+ props, req, defs = {}, {}, {}
78
+
79
+ for n, o in d.items():
80
+ if n != 'return' and n != 'self':
81
+ _process_property(n, o, props, req, defs)
82
+
83
+ schema = dict(type='object', properties=props, title=obj.__name__ if isinstance(obj, type) else None)
84
+ if req: schema['required'] = list(req)
85
+ if defs: schema['$defs'] = defs
86
+ return schema
87
+
88
+ # %% ../01_funccall.ipynb 26
32
89
  def get_schema(f:callable, pname='input_schema')->dict:
33
- "Convert function `f` into a JSON schema `dict` for tool use."
34
- d = docments(f, full=True)
35
- ret = d.pop('return')
36
- d.pop('self', None) # Ignore `self` for methods
37
- paramd = {
38
- 'type': "object",
39
- 'properties': {n:_param(n,o) for n,o in d.items() if n[0]!='_'},
40
- 'required': [n for n,o in d.items() if o.default is empty and n[0]!='_']
41
- }
90
+ "Generate JSON schema for a class, function, or method"
91
+ schema = _get_nested_schema(f)
42
92
  desc = f.__doc__
43
93
  assert desc, "Docstring missing!"
94
+ d = docments(f, full=True)
95
+ ret = d.pop('return')
44
96
  if ret.anno is not empty: desc += f'\n\nReturns:\n- type: {_types(ret.anno)[0]}'
45
- if ret.docment: desc += f'\n- description: {ret.docment}'
46
- return {'name':f.__name__, 'description':desc, pname:paramd}
97
+ return {"name": f.__name__, "description": desc, pname: schema}
47
98
 
48
- # %% ../01_funccall.ipynb 22
99
+ # %% ../01_funccall.ipynb 39
49
100
  import ast, time, signal, traceback
50
101
  from fastcore.utils import *
51
102
 
52
- # %% ../01_funccall.ipynb 23
103
+ # %% ../01_funccall.ipynb 40
53
104
  def _copy_loc(new, orig):
54
105
  "Copy location information from original node to new node and all children."
55
106
  new = ast.copy_location(new, orig)
@@ -58,7 +109,7 @@ def _copy_loc(new, orig):
58
109
  elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])
59
110
  return new
60
111
 
61
- # %% ../01_funccall.ipynb 25
112
+ # %% ../01_funccall.ipynb 42
62
113
  def _run(code:str ):
63
114
  "Run `code`, returning final expression (similar to IPython)"
64
115
  tree = ast.parse(code)
@@ -81,7 +132,7 @@ def _run(code:str ):
81
132
  if _result is not None: return _result
82
133
  return stdout_buffer.getvalue().strip()
83
134
 
84
- # %% ../01_funccall.ipynb 30
135
+ # %% ../01_funccall.ipynb 47
85
136
  def python(code, # Code to execute
86
137
  timeout=5 # Maximum run time in seconds before a `TimeoutError` is raised
87
138
  ): # Result of last node, if it's an expression, or `None` otherwise
@@ -93,3 +144,19 @@ def python(code, # Code to execute
93
144
  try: return _run(code)
94
145
  except Exception as e: return traceback.format_exc()
95
146
  finally: signal.alarm(0)
147
+
148
+ # %% ../01_funccall.ipynb 54
149
+ def mk_ns(*funcs_or_objs):
150
+ merged = {}
151
+ for o in funcs_or_objs:
152
+ if isinstance(o, type): merged |= {n:getattr(o,n) for n,m in o.__dict__.items() if isinstance(m, (staticmethod, classmethod))}
153
+ if isinstance(o, object): merged |= {n:getattr(o,n) for n, m in inspect.getmembers(o, inspect.ismethod)} | {n:m for n,m in o.__class__.__dict__.items() if isinstance(m, staticmethod)}
154
+ if callable(o) and hasattr(o, '__name__'): merged |= {o.__name__: o}
155
+ return merged
156
+
157
+ # %% ../01_funccall.ipynb 63
158
+ def call_func(fc_name, fc_inputs, ns):
159
+ "Call the function `fc_name` with the given `fc_inputs` using namespace `ns`."
160
+ if not isinstance(ns, abc.Mapping): ns = mk_ns(*ns)
161
+ func = ns[fc_name]
162
+ return func(**fc_inputs)
toolslm/md_hier.py ADDED
@@ -0,0 +1,136 @@
1
+ import re
2
+ from fastcore.utils import *
3
+ __all__ = ['markdown_to_dict', 'create_heading_dict']
4
+
5
+ def markdown_to_dict(markdown_content):
6
+ def clean_heading(text): return re.sub(r'[^A-Za-z0-9 ]+', '', text).strip()
7
+
8
+ lines = markdown_content.splitlines()
9
+ headings = []
10
+
11
+ # Parse headings with their levels and line numbers
12
+ for idx, line in enumerate(lines):
13
+ match = re.match(r'^(#{1,6})\s*(.*)', line)
14
+ if match:
15
+ level = len(match.group(1))
16
+ text = match.group(2).strip()
17
+ headings.append({'level': level, 'text': text, 'line': idx})
18
+
19
+ # Assign content to each heading, including subheadings
20
+ for i, h in enumerate(headings):
21
+ start = h['line'] # Include the heading line itself
22
+ # Find the end index: next heading of same or higher level
23
+ for j in range(i + 1, len(headings)):
24
+ if headings[j]['level'] <= h['level']:
25
+ end = headings[j]['line']
26
+ break
27
+ else: end = len(lines)
28
+ h['content'] = '\n'.join(lines[start:end]).strip()
29
+
30
+ # Build the dictionary with hierarchical keys
31
+ result,stack = {},[]
32
+ for h in headings:
33
+ stack = stack[:h['level'] - 1] + [clean_heading(h['text'])]
34
+ key = '.'.join(stack)
35
+ result[key] = h['content']
36
+ return dict2obj(result)
37
+
38
+ def create_heading_dict(text):
39
+ headings = re.findall(r'^#+.*', text, flags=re.MULTILINE)
40
+ result = {}
41
+ stack = [result]
42
+ prev_level = 0
43
+
44
+ for heading in headings:
45
+ level = heading.count('#')
46
+ title = heading.strip('#').strip()
47
+ while level <= prev_level:
48
+ stack.pop()
49
+ prev_level -= 1
50
+ new_dict = {}
51
+ stack[-1][title] = new_dict
52
+ stack.append(new_dict)
53
+ prev_level = level
54
+ return dict2obj(result)
55
+
56
+
57
+ if __name__=='__main__':
58
+ md_content = """
59
+ # User
60
+
61
+ This is the User section.
62
+
63
+ ## Tokens
64
+
65
+ Details about tokens.
66
+
67
+ ### Value
68
+
69
+ The value of tokens.
70
+
71
+ Some more details.
72
+
73
+ ## Settings
74
+
75
+ User settings information.
76
+
77
+ # Admin
78
+
79
+ Admin section.
80
+
81
+ ## Users
82
+
83
+ Admin users management.
84
+ """
85
+
86
+ result = markdown_to_dict(md_content)
87
+ #for key, value in result.items(): print(f'Key: {key}\nValue:\n{value}\n{"-"*40}')
88
+
89
+ def test_empty_content():
90
+ md_content = "# Empty Heading"
91
+ result = markdown_to_dict(md_content)
92
+ assert result['Empty Heading'] == '# Empty Heading'
93
+
94
+ def test_special_characters():
95
+ md_content = "# Heading *With* Special _Characters_!\nContent under heading."
96
+ result = markdown_to_dict(md_content)
97
+ assert 'Heading With Special Characters' in result
98
+ assert result['Heading With Special Characters'] == '# Heading *With* Special _Characters_!\nContent under heading.'
99
+
100
+ def test_duplicate_headings():
101
+ md_content = "# Duplicate\n## Duplicate\n### Duplicate\nContent under duplicate headings."
102
+ result = markdown_to_dict(md_content)
103
+ assert 'Duplicate' in result
104
+ assert 'Duplicate.Duplicate' in result
105
+ assert 'Duplicate.Duplicate.Duplicate' in result
106
+ assert result['Duplicate.Duplicate.Duplicate'] == '### Duplicate\nContent under duplicate headings.'
107
+
108
+ def test_no_content():
109
+ md_content = "# No Content Heading\n## Subheading"
110
+ result = markdown_to_dict(md_content)
111
+ assert result['No Content Heading'] == '# No Content Heading\n## Subheading'
112
+ assert result['No Content Heading.Subheading'] == '## Subheading'
113
+
114
+ def test_different_levels():
115
+ md_content = "### Level 3 Heading\nContent at level 3.\n# Level 1 Heading\nContent at level 1."
116
+ result = markdown_to_dict(md_content)
117
+ assert 'Level 3 Heading' in result
118
+ assert 'Level 1 Heading' in result
119
+ assert result['Level 3 Heading'] == '### Level 3 Heading\nContent at level 3.'
120
+ assert result['Level 1 Heading'] == '# Level 1 Heading\nContent at level 1.'
121
+
122
+ def test_parent_includes_subheadings():
123
+ md_content = "# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content."
124
+ result = markdown_to_dict(md_content)
125
+ assert result['Parent'] == '# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content.'
126
+ assert result['Parent.Child'] == '## Child\nChild content.\n### Grandchild\nGrandchild content.'
127
+ assert result['Parent.Child.Grandchild'] == '### Grandchild\nGrandchild content.'
128
+
129
+ test_empty_content()
130
+ test_special_characters()
131
+ test_duplicate_headings()
132
+ test_no_content()
133
+ test_different_levels()
134
+ test_parent_includes_subheadings()
135
+ print('tests passed')
136
+
toolslm/xml.py CHANGED
@@ -50,40 +50,32 @@ def mk_doctype(content:str, # The document content
50
50
  if source is None: source = hashlib.md5(content.encode()).hexdigest()[:8]
51
51
  return doctype(_add_nls(str(source).strip()), _add_nls(content.strip()))
52
52
 
53
- # %% ../00_xml.ipynb 17
53
+ # %% ../00_xml.ipynb 16
54
54
  def mk_doc(index:int, # The document index
55
55
  content:str, # The document content
56
- source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
57
- ) -> tuple:
58
- "Create an `ft` format tuple for a single doc in Anthropic's recommended format"
59
- dt = mk_doctype(content, source)
60
- content = ft('document_content', dt.content)
61
- source = ft('source', dt.source)
62
- return ft('document', source, content, index=index)
63
-
64
- # %% ../00_xml.ipynb 18
65
- def mk_doc(index:int, # The document index
66
- content:str, # The document content
67
- source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
56
+ source:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided
57
+ **kwargs
68
58
  ) -> tuple:
69
59
  "Create an `ft` format tuple for a single doc in Anthropic's recommended format"
70
60
  dt = mk_doctype(content, source)
71
61
  content = Document_content(dt.content)
72
62
  source = Source(dt.source)
73
- return Document(source, content, index=index)
63
+ return Document(source, content, index=index, **kwargs)
74
64
 
75
- # %% ../00_xml.ipynb 22
65
+ # %% ../00_xml.ipynb 19
76
66
  def docs_xml(docs:list[str], # The content of each document
77
67
  sources:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
78
- prefix:bool=True # Include Anthropic's suggested prose intro?
68
+ prefix:bool=True, # Include Anthropic's suggested prose intro?
69
+ details:Optional[list]=None # Optional list of dicts with additional attrs for each doc
79
70
  )->str:
80
71
  "Create an XML string containing `docs` in Anthropic's recommended format"
81
72
  pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
82
73
  if sources is None: sources = [None]*len(docs)
83
- docs = (mk_doc(i+1, *o) for i,o in enumerate(zip(docs,sources)))
74
+ if details is None: details = [{}]*len(docs)
75
+ docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,sources,details)))
84
76
  return pre + to_xml(Documents(docs))
85
77
 
86
- # %% ../00_xml.ipynb 29
78
+ # %% ../00_xml.ipynb 26
87
79
  def files2ctx(
88
80
  fnames:list[Union[str,Path]], # List of file names to add to context
89
81
  prefix:bool=True # Include Anthropic's suggested prose intro?
@@ -92,7 +84,7 @@ def files2ctx(
92
84
  contents = [o.read_text() for o in fnames]
93
85
  return docs_xml(contents, fnames, prefix=prefix)
94
86
 
95
- # %% ../00_xml.ipynb 32
87
+ # %% ../00_xml.ipynb 29
96
88
  @delegates(globtastic)
97
89
  def folder2ctx(
98
90
  folder:Union[str,Path], # Folder name containing files to add to context
@@ -102,7 +94,7 @@ def folder2ctx(
102
94
  fnames = globtastic(folder, **kwargs)
103
95
  return files2ctx(fnames, prefix=prefix)
104
96
 
105
- # %% ../00_xml.ipynb 34
97
+ # %% ../00_xml.ipynb 31
106
98
  @call_parse
107
99
  @delegates(folder2ctx)
108
100
  def folder2ctx_cli(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toolslm
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: Tools to make language models a bit easier to use
5
5
  Home-page: https://github.com/AnswerDotAI/toolslm
6
6
  Author: Jeremy Howard
@@ -17,6 +17,10 @@ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: fastcore >=1.5.47
20
+ Requires-Dist: beautifulsoup4
21
+ Requires-Dist: html2text
22
+ Requires-Dist: httpx
23
+ Requires-Dist: llms-txt
20
24
  Provides-Extra: dev
21
25
 
22
26
  # toolslm
@@ -0,0 +1,13 @@
1
+ toolslm/__init__.py,sha256=R9xOYoYrWKcfO5zvTeGC3m_eDNOvxMd8CocQs2tLufo,22
2
+ toolslm/_modidx.py,sha256=EIl2FBWhcZUS46r1AU0wURYg2O6Z3aXTPUr3p8Smrqk,3882
3
+ toolslm/download.py,sha256=tf0TGFzJ6qbxCjjuG9iRC2i6lutcF9GviWY0fJc_lSU,4378
4
+ toolslm/funccall.py,sha256=hSvBvfMv-YcBSUUs4-NrYu1f8jg4gfu2s82cPyIHVkU,6534
5
+ toolslm/md_hier.py,sha256=hkCjuOfIFWuMEiM2_XCoD9QIBjy9huLOSvpX_bMdn0Y,4645
6
+ toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
7
+ toolslm/xml.py,sha256=Alcd96KfNO8LklVefyc51LbXBoVLRSgifrpMVZPqYsc,4120
8
+ toolslm-0.0.7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
+ toolslm-0.0.7.dist-info/METADATA,sha256=sdRs3kCMl1xI8Z1if4xsGWuGaX9hbYGB0zs0BbRhQp0,3882
10
+ toolslm-0.0.7.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
11
+ toolslm-0.0.7.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
12
+ toolslm-0.0.7.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
13
+ toolslm-0.0.7.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- toolslm/__init__.py,sha256=S7u1lbuWmM3A3ajykBialmPoJUK6Jg-WmNqM-9OZFdk,22
2
- toolslm/_modidx.py,sha256=6T36Q2cYKH0lp9Tt9Us8xpZV-Z0FYqrtZGu2ZykHDkg,2068
3
- toolslm/funccall.py,sha256=mzWNLdZY6cYk-I3O5noRiEB089mPwJhnRQFsS5_JYDs,3856
4
- toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
5
- toolslm/xml.py,sha256=3rMyYK9VOvY3NElSNEoMGIe4iM8InKM-gbvjrK-2Ub0,4421
6
- toolslm-0.0.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
7
- toolslm-0.0.5.dist-info/METADATA,sha256=cJmpD5wO6AO62izboiKw8KeifsGy9duhZnJW_IQSXDg,3782
8
- toolslm-0.0.5.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
9
- toolslm-0.0.5.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
10
- toolslm-0.0.5.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
11
- toolslm-0.0.5.dist-info/RECORD,,