PyPI - toolslm - Versions diffs - 0.3.1__tar.gz → 0.3.3__tar.gz - Mend

toolslm 0.3.1tar.gz → 0.3.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{toolslm-0.3.1/toolslm.egg-info → toolslm-0.3.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: toolslm
-Version: 0.3.1
+Version: 0.3.3
 Summary: Tools to make language models a bit easier to use
 Home-page: https://github.com/AnswerDotAI/toolslm
 Author: Jeremy Howard

{toolslm-0.3.1 → toolslm-0.3.3}/settings.ini RENAMED Viewed

@@ -1,7 +1,7 @@
 [DEFAULT]
 repo = toolslm
 lib_name = toolslm
-version = 0.3.1
+version = 0.3.3
 min_python = 3.9
 license = apache2
 black_formatting = False

toolslm-0.3.3/toolslm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.3.3"

{toolslm-0.3.1 → toolslm-0.3.3}/toolslm/_modidx.py RENAMED Viewed

@@ -35,10 +35,15 @@ d = { 'settings': { 'branch': 'main',
                                                                                     'toolslm/shell.py'),
                                'toolslm.shell.get_shell': ('shell.html#get_shell', 'toolslm/shell.py')},
             'toolslm.xml': { 'toolslm.xml._add_nls': ('xml.html#_add_nls', 'toolslm/xml.py'),
+                             'toolslm.xml.cell2out': ('xml.html#cell2out', 'toolslm/xml.py'),
+                             'toolslm.xml.cell2xml': ('xml.html#cell2xml', 'toolslm/xml.py'),
                              'toolslm.xml.docs_xml': ('xml.html#docs_xml', 'toolslm/xml.py'),
                              'toolslm.xml.files2ctx': ('xml.html#files2ctx', 'toolslm/xml.py'),
                              'toolslm.xml.folder2ctx': ('xml.html#folder2ctx', 'toolslm/xml.py'),
                              'toolslm.xml.folder2ctx_cli': ('xml.html#folder2ctx_cli', 'toolslm/xml.py'),
+                             'toolslm.xml.get_mime_text': ('xml.html#get_mime_text', 'toolslm/xml.py'),
                              'toolslm.xml.json_to_xml': ('xml.html#json_to_xml', 'toolslm/xml.py'),
                              'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
-                             'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py')}}}
+                             'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py'),
+                             'toolslm.xml.nb2xml': ('xml.html#nb2xml', 'toolslm/xml.py'),
+                             'toolslm.xml.read_file': ('xml.html#read_file', 'toolslm/xml.py')}}}

{toolslm-0.3.1 → toolslm-0.3.3}/toolslm/funccall.py RENAMED Viewed

@@ -107,7 +107,9 @@ def _get_nested_schema(obj):
         if n != 'return' and n != 'self':
             _process_property(n, o, props, req, defs)
-    schema = dict(type='object', properties=props, title=obj.__name__ if isinstance(obj, type) else None)
+    tkw = {}
+    if isinstance(obj, type): tkw['title']=obj.__name__
+    schema = dict(type='object', properties=props, **tkw)
     if req: schema['required'] = list(req)
     if defs: schema['$defs'] = defs
     return schema

{toolslm-0.3.1 → toolslm-0.3.3}/toolslm/md_hier.py RENAMED Viewed

@@ -1,75 +1,63 @@
 import re
 from fastcore.utils import *
-__all__ = ['markdown_to_dict', 'create_heading_dict']
+__all__ = ['create_heading_dict', 'HeadingDict']
-def markdown_to_dict(
-    markdown_content:str  # Markdown text including headings
-)->AttrDict: # Dictionary with dot-separated hierarchical keys and content values
-    "Parse markdown content into a hierarchical dictionary with dot-separated keys."
-    def clean_heading(text): return re.sub(r'[.]+', '', text).strip()  # Only remove dots (key separator)
+class HeadingDict(dict):
+    """A dictionary-like object that also stores the markdown text content."""
+    def __init__(self, text="", *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.text = text
-    lines = markdown_content.splitlines()
+def create_heading_dict(text, rm_fenced=True):
+    "Create a nested dictionary structure from markdown headings."
+    original_text = text
+    original_lines = text.splitlines()
+    # Use fenced-removed text only for finding headings
+    text_for_headings = text
+    if rm_fenced: text_for_headings = re.sub(r'```[\s\S]*?```', '', text)
+    lines_for_headings = text_for_headings.splitlines()
     headings = []
-    in_code_block = False
     # Parse headings with their levels and line numbers
-    for idx, line in enumerate(lines):
-        # Toggle code block state when encountering fence
-        if line.strip().startswith('```'): in_code_block = not in_code_block
-        # Only detect headings when not in a code block
-        if in_code_block: continue
-        match = re.match(r'^(#{1,6})\s*(.*)', line)
+    for idx, line in enumerate(lines_for_headings):
+        match = re.match(r'^(#{1,6})\s+\S.*', line)
         if match:
             level = len(match.group(1))
-            text = match.group(2).strip()
-            headings.append({'level': level, 'text': text, 'line': idx})
+            title = line.strip('#').strip()
+            headings.append({'level': level, 'title': title, 'line': idx})
-    # Assign content to each heading, including subheadings
+    # Assign text content to each heading using original lines
     for i, h in enumerate(headings):
-        start = h['line']  # Include the heading line itself
+        start = h['line']
         # Find the end index: next heading of same or higher level
         for j in range(i + 1, len(headings)):
             if headings[j]['level'] <= h['level']:
                 end = headings[j]['line']
                 break
-        else: end = len(lines)
-        h['content'] = '\n'.join(lines[start:end]).strip()
-    # Build the dictionary with hierarchical keys
-    result,stack = {},[]
-    if not headings:
-        return dict2obj(result)
+        else:
+            end = len(original_lines)
+        h['content'] = '\n'.join(original_lines[start:end]).strip()
-    first_level = headings[0]['level']
-    for h in headings:
-        stack = stack[:h['level'] - first_level] + [clean_heading(h['text'])]
-        key = '.'.join(stack)
-        result[key] = h['content']
-    return dict2obj(result)
-def create_heading_dict(text, rm_fenced=True):
-    "Create a nested dictionary structure from markdown headings."
-    if rm_fenced: text = re.sub(r'```[\s\S]*?```', '', text)
-    headings = re.findall(r'^#+.*', text, flags=re.MULTILINE)
-    result = {}
+    # Build the nested structure
+    result = HeadingDict(original_text)
     stack = [result]
-    stack_levels = [0]  # Track the level at each stack position
-    for heading in headings:
-        level = heading.count('#')
-        title = heading.strip('#').strip()
+    stack_levels = [0]
+    for h in headings:
         # Pop stack until we find the right parent level
-        while len(stack) > 1 and stack_levels[-1] >= level:
+        while len(stack) > 1 and stack_levels[-1] >= h['level']:
             stack.pop()
             stack_levels.pop()
-        new_dict = {}
-        stack[-1][title] = new_dict
+        new_dict = HeadingDict(h['content'])
+        stack[-1][h['title']] = new_dict
         stack.append(new_dict)
-        stack_levels.append(level)
+        stack_levels.append(h['level'])
-    return dict2obj(result)
+    return result
 if __name__=='__main__':
@@ -101,63 +89,71 @@ Admin section.
 Admin users management.
 """
-    result = markdown_to_dict(md_content)
+    result = create_heading_dict(md_content)
     #for key, value in result.items(): print(f'Key: {key}\nValue:\n{value}\n{"-"*40}')
     def test_empty_content():
         md_content = "# Empty Heading"
-        result = markdown_to_dict(md_content)
-        assert result['Empty Heading'] == '# Empty Heading'
+        result = create_heading_dict(md_content)
+        assert 'Empty Heading' in result
+        assert result['Empty Heading'].text == '# Empty Heading'
+        assert result.text == md_content
     def test_special_characters():
         md_content = "# Heading *With* Special _Characters_!\nContent under heading."
-        result = markdown_to_dict(md_content)
+        result = create_heading_dict(md_content)
         assert 'Heading *With* Special _Characters_!' in result
-        assert result['Heading *With* Special _Characters_!'] == '# Heading *With* Special _Characters_!\nContent under heading.'
+        assert result['Heading *With* Special _Characters_!'].text == '# Heading *With* Special _Characters_!\nContent under heading.'
+        assert result.text == md_content
     def test_duplicate_headings():
         md_content = "# Duplicate\n## Duplicate\n### Duplicate\nContent under duplicate headings."
-        result = markdown_to_dict(md_content)
+        result = create_heading_dict(md_content)
         assert 'Duplicate' in result
-        assert 'Duplicate.Duplicate' in result
-        assert 'Duplicate.Duplicate.Duplicate' in result
-        assert result['Duplicate.Duplicate.Duplicate'] == '### Duplicate\nContent under duplicate headings.'
+        assert 'Duplicate' in result['Duplicate']
+        assert 'Duplicate' in result['Duplicate']['Duplicate']
+        assert result['Duplicate']['Duplicate']['Duplicate'].text == '### Duplicate\nContent under duplicate headings.'
+        assert result.text == md_content
     def test_no_content():
         md_content = "# No Content Heading\n## Subheading"
-        result = markdown_to_dict(md_content)
-        assert result['No Content Heading'] == '# No Content Heading\n## Subheading'
-        assert result['No Content Heading.Subheading'] == '## Subheading'
+        result = create_heading_dict(md_content)
+        assert result['No Content Heading'].text == '# No Content Heading\n## Subheading'
+        assert result['No Content Heading']['Subheading'].text == '## Subheading'
+        assert result.text == md_content
     def test_different_levels():
         md_content = "### Level 3 Heading\nContent at level 3.\n# Level 1 Heading\nContent at level 1."
-        result = markdown_to_dict(md_content)
+        result = create_heading_dict(md_content)
         assert 'Level 3 Heading' in result
         assert 'Level 1 Heading' in result
-        assert result['Level 3 Heading'] == '### Level 3 Heading\nContent at level 3.'
-        assert result['Level 1 Heading'] == '# Level 1 Heading\nContent at level 1.'
+        assert result['Level 3 Heading'].text == '### Level 3 Heading\nContent at level 3.'
+        assert result['Level 1 Heading'].text == '# Level 1 Heading\nContent at level 1.'
+        assert result.text == md_content
     def test_parent_includes_subheadings():
         md_content = "# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content."
-        result = markdown_to_dict(md_content)
-        assert result['Parent'] == '# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content.'
-        assert result['Parent.Child'] == '## Child\nChild content.\n### Grandchild\nGrandchild content.'
-        assert result['Parent.Child.Grandchild'] == '### Grandchild\nGrandchild content.'
+        result = create_heading_dict(md_content)
+        assert result['Parent'].text == '# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content.'
+        assert result['Parent']['Child'].text == '## Child\nChild content.\n### Grandchild\nGrandchild content.'
+        assert result['Parent']['Child']['Grandchild'].text == '### Grandchild\nGrandchild content.'
+        assert result.text == md_content
     def test_multiple_level2_siblings():
-        md_content = "##Sib 1\n##Sib 2\n##Sib 3\n##Sib 4\n##Sib 5'"
-        result = markdown_to_dict(md_content)
+        md_content = "## Sib 1\n## Sib 2\n## Sib 3\n## Sib 4\n## Sib 5'"
+        result = create_heading_dict(md_content)
         assert 'Sib 1' in result
         assert 'Sib 2' in result
         assert 'Sib 3' in result
         assert 'Sib 4' in result
-        assert "Sib 5'" in result  # Note the apostrophe is preserved
+        assert "Sib 5'" in result
+        assert result.text == md_content
     def test_code_chunks_escaped():
         md_content = "# Parent\nParent content.\n## Child\nChild content.\n```python\n# Code comment\nprint('Hello, world!')\n```"
-        result = markdown_to_dict(md_content)
-        assert 'Code comment' not in result
-        assert "# Code comment" in result['Parent.Child']
+        result = create_heading_dict(md_content)
+        assert 'Code comment' not in str(result)
+        assert result.text == md_content
     test_empty_content()
     test_special_characters()
@@ -180,8 +176,28 @@ Admin users management.
         result = create_heading_dict(md_content)
         assert 'Code comment' not in result
+    def test_fenced_blocks_preserved_in_text():
+        md_content = """# Section
+Content before code.
+```python
+# This heading should be ignored for structure
+def hello():
+    print("Hello, world!")
+```
+More content after code."""
+        result = create_heading_dict(md_content)
+        # Fenced code should be preserved in text content
+        assert '```python' in result['Section'].text
+        assert 'def hello():' in result['Section'].text
+        assert '```' in result['Section'].text
+        # But headings inside fenced blocks should not create structure
+        assert 'This heading should be ignored for structure' not in result['Section']
     test_nested_headings()
     test_code_chunks_escaped()
+    test_fenced_blocks_preserved_in_text()
     def test_multiple_h1s():
         md_content = "# First H1\n# Second H1\n# Third H1"
@@ -221,48 +237,38 @@ Admin users management.
     test_skip_levels_up()
     test_non_h1_start()
-    # Critical edge case tests
+    # Edge case tests
     def test_empty_input():
-        result = markdown_to_dict("")
-        assert result == {}
         result = create_heading_dict("")
         assert result == {}
+        assert result.text == ""
     def test_whitespace_only():
-        result = markdown_to_dict("   \n\t  \n   ")
-        assert result == {}
         result = create_heading_dict("   \n\t  \n   ")
         assert result == {}
+        assert result.text == "   \n\t  \n   "
     def test_malformed_headings():
-        # No space after # (actually works - regex allows it)
-        md_content = "#NoSpace\n###AlsoNoSpace\nContent"
-        result = markdown_to_dict(md_content)
-        assert 'NoSpace' in result
-        assert 'NoSpace.AlsoNoSpace' in result
-        # Too many #s (matches max 6, extra # preserved in text)
+        # Too many #s (matches max 6)
         md_content = "####### Too Many\nContent"
-        result = markdown_to_dict(md_content)
-        assert '# Too Many' in result  # Extra # now preserved in heading text
-        # Empty heading (actually creates empty key)
-        md_content = "##   \nContent after empty heading"
-        result = markdown_to_dict(md_content)
-        assert '' in result  # Empty heading creates empty key
+        result = create_heading_dict(md_content)
+        assert 'Too Many' not in result
+        assert result.text == md_content
     def test_unicode_and_emojis():
         # Unicode characters
         md_content = "# Café & Naïve\nContent with unicode\n## 中文标题\nChinese content"
-        result = markdown_to_dict(md_content)
+        result = create_heading_dict(md_content)
         assert 'Café & Naïve' in result
-        assert 'Café & Naïve.中文标题' in result
+        assert '中文标题' in result['Café & Naïve']
+        assert result.text == md_content
         # Emojis
         md_content = "# 🚀 Rocket Heading\nRocket content\n## 💻 Computer\nComputer content"
-        result = markdown_to_dict(md_content)
+        result = create_heading_dict(md_content)
         assert '🚀 Rocket Heading' in result
-        assert '🚀 Rocket Heading.💻 Computer' in result
+        assert '💻 Computer' in result['🚀 Rocket Heading']
+        assert result.text == md_content
     test_empty_input()
     test_whitespace_only()

{toolslm-0.3.1 → toolslm-0.3.3}/toolslm/xml.py RENAMED Viewed

@@ -1,7 +1,8 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../00_xml.ipynb.
 # %% auto 0
-__all__ = ['doctype', 'json_to_xml', 'mk_doctype', 'mk_doc', 'docs_xml', 'files2ctx', 'folder2ctx', 'folder2ctx_cli']
+__all__ = ['doctype', 'json_to_xml', 'get_mime_text', 'cell2out', 'cell2xml', 'nb2xml', 'mk_doctype', 'mk_doc', 'docs_xml',
+           'read_file', 'files2ctx', 'folder2ctx', 'folder2ctx_cli']
 # %% ../00_xml.ipynb
 import hashlib,xml.etree.ElementTree as ET
@@ -10,7 +11,7 @@ from collections import namedtuple
 from fastcore.utils import *
 from fastcore.meta import delegates
 from fastcore.xtras import hl_md
-from fastcore.xml import to_xml, Document, Documents, Document_content, Src
+from fastcore.xml import to_xml, Document, Documents, Document_content, Src, Source,Out,Outs,Cell
 from fastcore.script import call_parse
 try: from IPython import display
 except: display=None
@@ -31,6 +32,39 @@ def json_to_xml(d:dict, # JSON dictionary to convert
     ET.indent(root)
     return ET.tostring(root, encoding='unicode')
+# %% ../00_xml.ipynb
+def get_mime_text(data):
+    "Get text from MIME bundle, preferring markdown over plain"
+    if 'text/markdown' in data: return ''.join(list(data['text/markdown']))
+    if 'text/plain' in data: return ''.join(list(data['text/plain']))
+# %% ../00_xml.ipynb
+def cell2out(o):
+    "Convert single notebook output to XML format"
+    if hasattr(o, 'data'):
+        txt = get_mime_text(o.data)
+        if txt: return Out(txt, mime='markdown' if 'text/markdown' in o.data else 'plain')
+    if hasattr(o, 'text'):
+        txt = o.text if isinstance(o.text, str) else ''.join(o.text)
+        return Out(txt, type='stream', name=o.get('name', 'stdout'))
+    if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')
+# %% ../00_xml.ipynb
+def cell2xml(cell):
+    "Convert notebook cell to concise XML format"
+    cts = Source(''.join(cell.source)) if hasattr(cell, 'source') and cell.source else None
+    out_items = L(getattr(cell,'outputs',[])).map(cell2out).filter()
+    outs = []
+    if out_items: outs = Outs(*out_items)
+    parts = [p for p in [cts, outs] if p]
+    return Cell(*parts, type=cell.cell_type)
+# %% ../00_xml.ipynb
+def nb2xml(fname):
+    nb = dict2obj(fname.read_json())
+    cells_xml = [to_xml(cell2xml(c), do_escape=False) for c in nb.cells if c.cell_type in ('code','markdown')]
+    return '\n'.join(cells_xml)
 # %% ../00_xml.ipynb
 doctype = namedtuple('doctype', ['src', 'content'])
@@ -73,15 +107,23 @@ def docs_xml(docs:list[str],  # The content of each document
     if srcs is None: srcs = [None]*len(docs)
     if details is None: details = [{}]*len(docs)
     docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
-    return pre + to_xml(Documents(docs))
+    return pre + to_xml(Documents(docs), do_escape=False)
+# %% ../00_xml.ipynb
+def read_file(fname):
+    "Read file content, converting notebooks to XML if needed"
+    fname = Path(fname)
+    if fname.suffix == '.ipynb': return nb2xml(fname)
+    return fname.read_text()
 # %% ../00_xml.ipynb
 def files2ctx(
     fnames:list[Union[str,Path]], # List of file names to add to context
     prefix:bool=True # Include Anthropic's suggested prose intro?
 )->str: # XML for LM context
+    "Convert files to XML context, handling notebooks"
     fnames = [Path(o) for o in fnames]
-    contents = [o.read_text() for o in fnames]
+    contents = [read_file(o) for o in fnames]
     return docs_xml(contents, fnames, prefix=prefix)
 # %% ../00_xml.ipynb

{toolslm-0.3.1 → toolslm-0.3.3/toolslm.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: toolslm
-Version: 0.3.1
+Version: 0.3.3
 Summary: Tools to make language models a bit easier to use
 Home-page: https://github.com/AnswerDotAI/toolslm
 Author: Jeremy Howard