toolslm 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toolslm/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.0.4"
1
+ __version__ = "0.0.6"
toolslm/_modidx.py CHANGED
@@ -5,7 +5,16 @@ d = { 'settings': { 'branch': 'main',
5
5
  'doc_host': 'https://AnswerDotAI.github.io',
6
6
  'git_url': 'https://github.com/AnswerDotAI/toolslm',
7
7
  'lib_path': 'toolslm'},
8
- 'syms': { 'toolslm.funccall': { 'toolslm.funccall._copy_loc': ('funccall.html#_copy_loc', 'toolslm/funccall.py'),
8
+ 'syms': { 'toolslm.download': { 'toolslm.download._tryget': ('download.html#_tryget', 'toolslm/download.py'),
9
+ 'toolslm.download.clean_md': ('download.html#clean_md', 'toolslm/download.py'),
10
+ 'toolslm.download.find_docs': ('download.html#find_docs', 'toolslm/download.py'),
11
+ 'toolslm.download.get_llmstxt': ('download.html#get_llmstxt', 'toolslm/download.py'),
12
+ 'toolslm.download.html2md': ('download.html#html2md', 'toolslm/download.py'),
13
+ 'toolslm.download.read_docs': ('download.html#read_docs', 'toolslm/download.py'),
14
+ 'toolslm.download.read_html': ('download.html#read_html', 'toolslm/download.py'),
15
+ 'toolslm.download.read_md': ('download.html#read_md', 'toolslm/download.py'),
16
+ 'toolslm.download.split_url': ('download.html#split_url', 'toolslm/download.py')},
17
+ 'toolslm.funccall': { 'toolslm.funccall._copy_loc': ('funccall.html#_copy_loc', 'toolslm/funccall.py'),
9
18
  'toolslm.funccall._param': ('funccall.html#_param', 'toolslm/funccall.py'),
10
19
  'toolslm.funccall._run': ('funccall.html#_run', 'toolslm/funccall.py'),
11
20
  'toolslm.funccall._types': ('funccall.html#_types', 'toolslm/funccall.py'),
toolslm/download.py ADDED
@@ -0,0 +1,97 @@
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../03_download.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['clean_md', 'read_md', 'html2md', 'read_html', 'get_llmstxt', 'split_url', 'find_docs', 'read_docs']
5
+
6
+ # %% ../03_download.ipynb 2
7
+ from fastcore.utils import *
8
+ from httpx import get
9
+ from fastcore.meta import delegates
10
+ from llms_txt import *
11
+
12
+ from html2text import HTML2Text
13
+ from bs4 import BeautifulSoup
14
+ from urllib.parse import urlparse, urljoin
15
+
16
+ # %% ../03_download.ipynb 4
17
+ def clean_md(text, rm_comments=True, rm_details=True):
18
+ "Remove comments and `<details>` sections from `text`"
19
+ if rm_comments: text = re.sub(r'\n?<!--.*?-->\n?', '', text, flags=re.DOTALL)
20
+ if rm_details: text = re.sub(r'\n?<details>.*?</details>\n?', '', text, flags=re.DOTALL)
21
+ return text
22
+
23
+ # %% ../03_download.ipynb 5
24
+ @delegates(get)
25
+ def read_md(url, rm_comments=True, rm_details=True, **kwargs):
26
+ "Read text from `url` and clean with `clean_docs`"
27
+ return clean_md(get(url, **kwargs).text, rm_comments=rm_comments, rm_details=rm_details)
28
+
29
+ # %% ../03_download.ipynb 7
30
+ def html2md(s:str):
31
+ "Convert `s` from HTML to markdown"
32
+ o = HTML2Text(bodywidth=5000)
33
+ o.ignore_links = True
34
+ o.mark_code = True
35
+ o.ignore_images = True
36
+ return o.handle(s)
37
+
38
+ # %% ../03_download.ipynb 8
39
+ def read_html(url, sel=None, rm_comments=True, rm_details=True):
40
+ "Get `url`, optionally selecting CSS selector `sel`, and convert to clean markdown"
41
+ page = get(url).text
42
+ if sel:
43
+ soup = BeautifulSoup(page, 'html.parser')
44
+ page = str(soup.find(sel))
45
+ md = html2md(page)
46
+ return clean_md(md, rm_comments, rm_details=rm_details)
47
+
48
+ # %% ../03_download.ipynb 10
49
+ def get_llmstxt(url, optional=False, n_workers=None):
50
+ "Get llms.txt file from and expand it with `llms_txt.create_ctx()`"
51
+ if not url.endswith('llms.txt'): return None
52
+ resp = get(url)
53
+ if resp.status_code!=200: return None
54
+ return create_ctx(resp.text, optional=optional, n_workers=n_workers)
55
+
56
+ # %% ../03_download.ipynb 12
57
+ def split_url(url):
58
+ "Split `url` into base, path, and file name, normalising name to '/' if empty"
59
+ parsed = urlparse(url.strip('/'))
60
+ base = f"{parsed.scheme}://{parsed.netloc}"
61
+ path,spl,fname = parsed.path.rpartition('/')
62
+ fname = spl+fname
63
+ if not path and not fname: path='/'
64
+ return base,path,fname
65
+
66
+ # %% ../03_download.ipynb 14
67
+ def _tryget(url):
68
+ "Return response from `url` if `status_code!=404`, otherwise `None`"
69
+ res = get(url)
70
+ return None if res.status_code==404 else url
71
+
72
+ # %% ../03_download.ipynb 15
73
+ def find_docs(url):
74
+ "If available, return LLM-friendly llms.txt context or markdown file location from `url`"
75
+ base,path,fname = split_url(url)
76
+ url = (base+path+fname).strip('/')
77
+ if fname=='/llms.txt': return url
78
+ if Path(fname).suffix in('.md', '.txt', '.rst'): return _tryget(url)
79
+ if '.' in fname: return _tryget(url+'.md')
80
+ res = _tryget(url+'/llms.txt')
81
+ if res: return res
82
+ res = _tryget(url+'/index.md')
83
+ if res: return res
84
+ res = _tryget(url+'/index.html.md')
85
+ if res: return res
86
+ res = _tryget(url+'/index-commonmark.md')
87
+ if res: return res
88
+ return None
89
+
90
+ # %% ../03_download.ipynb 19
91
+ def read_docs(url, optional=False, n_workers=None, rm_comments=True, rm_details=True):
92
+ "If available, return LLM-friendly llms.txt context or markdown file response for `url`"
93
+ url = find_docs(url)
94
+ if not url: return
95
+ if url.endswith('/llms.txt'): res = get_llmstxt(url, optional=optional, n_workers=n_workers)
96
+ else: res = get(url).text
97
+ return clean_md(res, rm_comments=rm_comments, rm_details=rm_details)
toolslm/funccall.py CHANGED
@@ -16,6 +16,7 @@ def _types(t:type)->tuple[str,Optional[str]]:
16
16
  "Tuple of json schema type name and (if appropriate) array item name."
17
17
  if t is empty: raise TypeError('Missing type')
18
18
  tmap = {int:"integer", float:"number", str:"string", bool:"boolean", list:"array", dict:"object"}
19
+ tmap.update({k.__name__: v for k, v in tmap.items()})
19
20
  if getattr(t, '__origin__', None) in (list,tuple): return "array", tmap.get(t.__args__[0], "object")
20
21
  else: return tmap[t], None
21
22
 
@@ -45,11 +46,11 @@ def get_schema(f:callable, pname='input_schema')->dict:
45
46
  if ret.docment: desc += f'\n- description: {ret.docment}'
46
47
  return {'name':f.__name__, 'description':desc, pname:paramd}
47
48
 
48
- # %% ../01_funccall.ipynb 22
49
+ # %% ../01_funccall.ipynb 24
49
50
  import ast, time, signal, traceback
50
51
  from fastcore.utils import *
51
52
 
52
- # %% ../01_funccall.ipynb 23
53
+ # %% ../01_funccall.ipynb 25
53
54
  def _copy_loc(new, orig):
54
55
  "Copy location information from original node to new node and all children."
55
56
  new = ast.copy_location(new, orig)
@@ -58,7 +59,7 @@ def _copy_loc(new, orig):
58
59
  elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])
59
60
  return new
60
61
 
61
- # %% ../01_funccall.ipynb 25
62
+ # %% ../01_funccall.ipynb 27
62
63
  def _run(code:str ):
63
64
  "Run `code`, returning final expression (similar to IPython)"
64
65
  tree = ast.parse(code)
@@ -81,7 +82,7 @@ def _run(code:str ):
81
82
  if _result is not None: return _result
82
83
  return stdout_buffer.getvalue().strip()
83
84
 
84
- # %% ../01_funccall.ipynb 30
85
+ # %% ../01_funccall.ipynb 32
85
86
  def python(code, # Code to execute
86
87
  timeout=5 # Maximum run time in seconds before a `TimeoutError` is raised
87
88
  ): # Result of last node, if it's an expression, or `None` otherwise
toolslm/xml.py CHANGED
@@ -37,6 +37,7 @@ doctype = namedtuple('doctype', ['source', 'content'])
37
37
  # %% ../00_xml.ipynb 11
38
38
  def _add_nls(s):
39
39
  "Add newlines to start and end of `s` if missing"
40
+ if not s: return s
40
41
  if s[ 0]!='\n': s = '\n'+s
41
42
  if s[-1]!='\n': s = s+'\n'
42
43
  return s
@@ -49,40 +50,32 @@ def mk_doctype(content:str, # The document content
49
50
  if source is None: source = hashlib.md5(content.encode()).hexdigest()[:8]
50
51
  return doctype(_add_nls(str(source).strip()), _add_nls(content.strip()))
51
52
 
52
- # %% ../00_xml.ipynb 17
53
+ # %% ../00_xml.ipynb 16
53
54
  def mk_doc(index:int, # The document index
54
55
  content:str, # The document content
55
- source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
56
- ) -> tuple:
57
- "Create an `ft` format tuple for a single doc in Anthropic's recommended format"
58
- dt = mk_doctype(content, source)
59
- content = ft('document_content', dt.content)
60
- source = ft('source', dt.source)
61
- return ft('document', source, content, index=index)
62
-
63
- # %% ../00_xml.ipynb 18
64
- def mk_doc(index:int, # The document index
65
- content:str, # The document content
66
- source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
56
+ source:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided
57
+ **kwargs
67
58
  ) -> tuple:
68
59
  "Create an `ft` format tuple for a single doc in Anthropic's recommended format"
69
60
  dt = mk_doctype(content, source)
70
61
  content = Document_content(dt.content)
71
62
  source = Source(dt.source)
72
- return Document(source, content, index=index)
63
+ return Document(source, content, index=index, **kwargs)
73
64
 
74
- # %% ../00_xml.ipynb 22
65
+ # %% ../00_xml.ipynb 19
75
66
  def docs_xml(docs:list[str], # The content of each document
76
67
  sources:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
77
- prefix:bool=True # Include Anthropic's suggested prose intro?
68
+ prefix:bool=True, # Include Anthropic's suggested prose intro?
69
+ details:Optional[list]=None # Optional list of dicts with additional attrs for each doc
78
70
  )->str:
79
71
  "Create an XML string containing `docs` in Anthropic's recommended format"
80
72
  pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
81
73
  if sources is None: sources = [None]*len(docs)
82
- docs = (mk_doc(i+1, *o) for i,o in enumerate(zip(docs,sources)))
74
+ if details is None: details = [{}]*len(docs)
75
+ docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,sources,details)))
83
76
  return pre + to_xml(Documents(docs))
84
77
 
85
- # %% ../00_xml.ipynb 29
78
+ # %% ../00_xml.ipynb 26
86
79
  def files2ctx(
87
80
  fnames:list[Union[str,Path]], # List of file names to add to context
88
81
  prefix:bool=True # Include Anthropic's suggested prose intro?
@@ -91,7 +84,7 @@ def files2ctx(
91
84
  contents = [o.read_text() for o in fnames]
92
85
  return docs_xml(contents, fnames, prefix=prefix)
93
86
 
94
- # %% ../00_xml.ipynb 32
87
+ # %% ../00_xml.ipynb 29
95
88
  @delegates(globtastic)
96
89
  def folder2ctx(
97
90
  folder:Union[str,Path], # Folder name containing files to add to context
@@ -101,7 +94,7 @@ def folder2ctx(
101
94
  fnames = globtastic(folder, **kwargs)
102
95
  return files2ctx(fnames, prefix=prefix)
103
96
 
104
- # %% ../00_xml.ipynb 34
97
+ # %% ../00_xml.ipynb 31
105
98
  @call_parse
106
99
  @delegates(folder2ctx)
107
100
  def folder2ctx_cli(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toolslm
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: Tools to make language models a bit easier to use
5
5
  Home-page: https://github.com/AnswerDotAI/toolslm
6
6
  Author: Jeremy Howard
@@ -17,6 +17,10 @@ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: fastcore >=1.5.47
20
+ Requires-Dist: beautifulsoup4
21
+ Requires-Dist: html2text
22
+ Requires-Dist: httpx
23
+ Requires-Dist: llms-txt
20
24
  Provides-Extra: dev
21
25
 
22
26
  # toolslm
@@ -0,0 +1,12 @@
1
+ toolslm/__init__.py,sha256=QiiYsv0kcJaB8wCWyT-FnI2b6be87HA-CrrIUn8LQhg,22
2
+ toolslm/_modidx.py,sha256=FiHwMAAjvPdu7kN0pA1OJTJbUg0ddo0o12_C9JUFPDc,3103
3
+ toolslm/download.py,sha256=BIhmbDSxM__57tukac63iwPx5sXIfbjYp7gh_fhp4Gw,3621
4
+ toolslm/funccall.py,sha256=ZXfzhP0N5cex7n8QHuxDfUb0BJX1iI1inFb064LAGlc,3914
5
+ toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
6
+ toolslm/xml.py,sha256=Alcd96KfNO8LklVefyc51LbXBoVLRSgifrpMVZPqYsc,4120
7
+ toolslm-0.0.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
+ toolslm-0.0.6.dist-info/METADATA,sha256=_J7uXsh_qRX_wVKxssQjFpRH7zZjAwC0av1J5UIfTdk,3882
9
+ toolslm-0.0.6.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
10
+ toolslm-0.0.6.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
11
+ toolslm-0.0.6.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
12
+ toolslm-0.0.6.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- toolslm/__init__.py,sha256=1mptEzQihbdyqqzMgdns_j5ZGK9gz7hR2bsgA_TnjO4,22
2
- toolslm/_modidx.py,sha256=6T36Q2cYKH0lp9Tt9Us8xpZV-Z0FYqrtZGu2ZykHDkg,2068
3
- toolslm/funccall.py,sha256=mzWNLdZY6cYk-I3O5noRiEB089mPwJhnRQFsS5_JYDs,3856
4
- toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
5
- toolslm/xml.py,sha256=dSJOHqSWnZlMK1Qf3396ISSaBHf5miNlLSYCixYB9ng,4398
6
- toolslm-0.0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
7
- toolslm-0.0.4.dist-info/METADATA,sha256=9Ni6CdLgvxTCx7LqIrnNyUgrfu0t2Wsdabp9jZlFNvw,3782
8
- toolslm-0.0.4.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
9
- toolslm-0.0.4.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
10
- toolslm-0.0.4.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
11
- toolslm-0.0.4.dist-info/RECORD,,