PyPI - symbex - Versions diffs - 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

symbex 1.3.1py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

symbex/cli.py +149 -52
{symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/METADATA +75 -10
symbex-1.4.1.dist-info/RECORD +10 -0
{symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/WHEEL +1 -1
symbex-1.3.1.dist-info/RECORD +0 -10
{symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/LICENSE +0 -0
{symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/entry_points.txt +0 -0
{symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/top_level.txt +0 -0

symbex/cli.py CHANGED Viewed

@@ -1,11 +1,15 @@
 import ast
 import click
+import csv
+import dataclasses
 import importlib
 import inspect
+import json
 import pathlib
 import site
 import subprocess
 import sys
+from typing import TextIO, Iterable, Literal, Tuple
 from .lib import (
     code_for_node,
@@ -16,6 +20,14 @@ from .lib import (
 )
+@dataclasses.dataclass
+class Output:
+    symbol_id: str
+    output_identifier_line: str
+    output_import_line: str
+    snippet: str
 @click.command()
 @click.version_option()
 @click.argument("symbols", nargs=-1)
@@ -169,6 +181,12 @@ from .lib import (
     help="Replace matching symbol with text from stdin",
 )
 @click.option("--rexec", help="Replace with the result of piping to this tool")
+# Output options
+@click.option("csv_", "--csv", is_flag=True, help="Output as CSV")
+@click.option("--tsv", is_flag=True, help="Output as TSV")
+@click.option("json_", "--json", is_flag=True, help="Output as JSON")
+@click.option("--nl", is_flag=True, help="Output as newline-delimited JSON")
+@click.option("--id-prefix", help="Prefix to use for symbol IDs")
 def cli(
     symbols,
     files,
@@ -200,6 +218,11 @@ def cli(
     check,
     replace,
     rexec,
+    csv_,
+    tsv,
+    json_,
+    nl,
+    id_prefix,
 ):
     """
     Find symbols in Python code and print the code for them.
@@ -258,6 +281,17 @@ def cli(
         symbex first_function --rexec "sed 's/^/# /'"
         # This uses sed to comment out the function body
     """
+    # Only one of --json, --csv, --tsv, --nl
+    output_formats = [csv_, tsv, json_, nl]
+    if sum(output_formats) > 1:
+        raise click.ClickException("Only one of --csv, --tsv, --json, --nl can be used")
+    if id_prefix and not sum(output_formats):
+        raise click.ClickException(
+            "--id-prefix can only be used with --csv, --tsv, --json or --nl"
+        )
+    if id_prefix is None:
+        id_prefix = ""
     if modules:
         module_dirs = []
         module_files = []
@@ -362,7 +396,7 @@ def cli(
         for directory in directories:
             for path in pathlib.Path(directory).rglob("*.py"):
                 # Skip if path is inside any of 'excludes'
-                if any(is_subpath(path, exclude) for exclude in excludes):
+                if any(path.resolve().is_relative_to(exclude) for exclude in excludes):
                     continue
                 if path.is_file():
                     yield path
@@ -436,54 +470,90 @@ def cli(
     pwd = pathlib.Path(".").resolve()
     num_matches = 0
     replace_matches = []
-    for file in iterate_files():
-        try:
-            code = read_file(file)
-        except UnicodeDecodeError as ex:
-            if not silent:
-                click.secho(f"# Unicode error in {file}: {ex}", err=True, fg="yellow")
-            continue
-        try:
-            nodes = find_symbol_nodes(code, str(file), symbols)
-        except SyntaxError as ex:
-            if not silent:
-                click.secho(f"# Syntax error in {file}: {ex}", err=True, fg="yellow")
-            continue
-        for node, class_name in nodes:
-            if not filter(node):
+    def stuff_to_output():
+        nonlocal num_matches
+        for file in iterate_files():
+            try:
+                code = read_file(file)
+            except UnicodeDecodeError as ex:
+                if not silent:
+                    click.secho(
+                        f"# Unicode error in {file}: {ex}", err=True, fg="yellow"
+                    )
                 continue
-            if count or check:
-                num_matches += 1
-                if count or not signatures:
-                    continue
-            # If file is within pwd, print relative path
-            if pwd in file.resolve().parents:
-                path = file.resolve().relative_to(pwd)
-            else:
-                # else print absolute path
-                path = file.resolve()
-            snippet, line_no = code_for_node(code, node, class_name, signatures, docs)
-            if replace:
-                replace_matches.append((file.resolve(), snippet, line_no))
+            try:
+                nodes = find_symbol_nodes(code, str(file), symbols)
+            except SyntaxError as ex:
+                if not silent:
+                    click.secho(
+                        f"# Syntax error in {file}: {ex}", err=True, fg="yellow"
+                    )
                 continue
-            if not no_file:
-                bits = ["# File:", path]
-                if class_name:
-                    bits.extend(["Class:", class_name])
-                bits.extend(["Line:", line_no])
-                click.echo(" ".join(str(bit) for bit in bits))
-            if imports:
-                import_line = import_line_for_function(
-                    node.name, path, sys_paths or directories
+            for node, class_name in nodes:
+                if not filter(node):
+                    continue
+                if count or check:
+                    num_matches += 1
+                    if count or not signatures:
+                        continue
+                # If file is within pwd, print relative path
+                if pwd in file.resolve().parents:
+                    path = file.resolve().relative_to(pwd)
+                else:
+                    # else print absolute path
+                    path = file.resolve()
+                snippet, line_no = code_for_node(
+                    code, node, class_name, signatures, docs
                 )
-                # If it's a class then output '# from x import Class' instead
-                if class_name:
-                    import_line = (
-                        import_line.split(" import ")[0] + " import " + class_name
+                if replace:
+                    replace_matches.append((file.resolve(), snippet, line_no))
+                    continue
+                output_identifier_line = None
+                output_import_line = None
+                symbol_id = None
+                if not no_file:
+                    bits = ["# File:", path]
+                    if class_name:
+                        bits.extend(["Class:", class_name])
+                    bits.extend(["Line:", line_no])
+                    symbol_id = "{}:{}".format(path, line_no)
+                    output_identifier_line = " ".join(str(bit) for bit in bits)
+                if imports:
+                    import_line = import_line_for_function(
+                        node.name, path, sys_paths or directories
                     )
-                click.echo("# " + import_line)
-            click.echo(snippet)
+                    # If it's a class then output '# from x import Class' instead
+                    if class_name:
+                        import_line = (
+                            import_line.split(" import ")[0] + " import " + class_name
+                        )
+                    symbol_id = import_line
+                    output_import_line = "# " + import_line
+                yield Output(
+                    symbol_id, output_identifier_line, output_import_line, snippet
+                )
+    if sum(output_formats) == 0:
+        for item in stuff_to_output():
+            if item.output_identifier_line:
+                click.echo(item.output_identifier_line)
+            if item.output_import_line:
+                click.echo(item.output_import_line)
+            click.echo(item.snippet)
             click.echo()
+    else:
+        # Do the fancy output formats thing
+        to_output(
+            sys.stdout,
+            ((id_prefix + item.symbol_id, item.snippet) for item in stuff_to_output()),
+            format="csv" if csv_ else "tsv" if tsv else "json" if json_ else "nl",
+        )
+        return
     if count:
         click.echo(num_matches)
@@ -531,13 +601,40 @@ def cli(
         filepath.write_text(new, "utf-8")
-def is_subpath(path: pathlib.Path, parent: pathlib.Path) -> bool:
-    try:
-        path.relative_to(parent)
-        return True
-    except ValueError:
-        return False
 def is_dunder(name):
     return name.startswith("__") and name.endswith("__")
+def to_output(
+    fp: TextIO,
+    lines: Iterable[Tuple[str, str]],
+    format: Literal["csv", "tsv", "json", "nl"] = "csv",
+) -> None:
+    if format == "nl":
+        for id, content in lines:
+            line = json.dumps({"id": id, "code": content})
+            fp.write(line + "\n")
+        return
+    elif format == "json":
+        fp.write("[")
+        first = True
+        for id, content in lines:
+            line = json.dumps({"id": id, "code": content})
+            if first:
+                fp.write(line)
+                first = False
+            else:
+                fp.write(",\n " + line)
+        fp.write("]\n")
+        return
+    dialect = "excel" if format == "csv" else "excel-tab"
+    writer = csv.writer(fp, dialect=dialect)
+    # Write header
+    writer.writerow(["id", "code"])
+    # Write content
+    for id, content in lines:
+        writer.writerow([id, content])

{symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: symbex
-Version: 1.3.1
+Version: 1.4.1
 Summary: Find the Python code for specified symbols
 Home-page: https://github.com/simonw/symbex
 Author: Simon Willison
@@ -13,13 +13,23 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: click
 Provides-Extra: test
-Requires-Dist: pytest ; extra == 'test'
-Requires-Dist: pytest-icdiff ; extra == 'test'
-Requires-Dist: cogapp ; extra == 'test'
-Requires-Dist: PyYAML ; extra == 'test'
-Requires-Dist: ruff ; extra == 'test'
-# symbex
+Requires-Dist: pytest; extra == "test"
+Requires-Dist: pytest-icdiff; extra == "test"
+Requires-Dist: cogapp; extra == "test"
+Requires-Dist: PyYAML; extra == "test"
+Requires-Dist: ruff; extra == "test"
+Dynamic: author
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: project-url
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+# Symbex
 [![PyPI](https://img.shields.io/pypi/v/symbex.svg)](https://pypi.org/project/symbex/)
 [![Changelog](https://img.shields.io/github/v/release/simonw/symbex?include_prereleases&label=changelog)](https://github.com/simonw/symbex/releases)
@@ -28,7 +38,7 @@ Requires-Dist: ruff ; extra == 'test'
 Find the Python code for specified symbols
-Read [symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project.
+Read [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project.
 ## Installation
@@ -313,6 +323,42 @@ Or to count every async test function:
 ```bash
 symbex --async 'test_*' --count
 ```
+## Structured output
+LLM defaults to outputting plain text (actually valid Python code, thanks to the way it uses comments).
+You can request output in CSV, TSV, JSON or newline-delimited JSON instead, using the following options:
+- `--json`: a JSON array, `[{"id": "...", "code": "..."}]`
+- `--nl`: newline-delimited JSON, `{"id": "...", "code": "..."}` per line
+- `--csv`: CSV with `id,code` as the heading row
+- `--tsv`: TSV with `id\tcode` as the heading row
+In each case the ID will be the path to the file containing the symbol, followed by a colon, followed by the line number of the symbol, for example:
+```json
+{
+  "id": "symbex/lib.py:82",
+  "code": "def match(name: str, symbols: Iterable[str]) -> bool:"
+}
+```
+If you pass `-i/--imports` the ID will be the import line instead:
+```json
+{
+  "id": "from symbex.lib import match",
+  "code": "def match(name: str, symbols: Iterable[str]) -> bool:"
+}
+```
+Pass `--id-prefix 'something:'` to add the specified prefix to the start of each ID.
+This example will generate a CSV file of all of your test functions, using the import style of IDs and a prefix of `test:`:
+```bash
+symbex 'test_*' \
+  --function \
+  --imports \
+  --csv > tests.csv
+```
 ## Using with LLM
@@ -329,6 +375,20 @@ And got back this:
 > This code defines a custom `Response` class with methods for returning HTTP responses. It includes methods for setting cookies, returning HTML, text, and JSON responses, and redirecting to a different URL. The `asgi_send` method sends the response to the client using the ASGI (Asynchronous Server Gateway Interface) protocol.
+The structured output feature is designed to be used with [LLM embeddings](https://llm.datasette.io/en/stable/embeddings/index.html). You can generate embeddings for every symbol in your codebase using [llm embed-multi](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed-multi) like this:
+```bash
+symbex '*' '*:*' --nl | \
+  llm embed-multi symbols - \
+  --format nl --database embeddings.db --store
+```
+This creates a database in `embeddings.db` containing all of your symbols along with embedding vectors.
+You can then search your code like this:
+```bash
+llm similar symbols -d embeddings.db -c 'test csv' | jq
+```
 ## Replacing a matched symbol
 The `--replace` option can be used to replace a single matched symbol with content piped in to standard input.
@@ -525,6 +585,11 @@ Options:
   --check                    Exit with non-zero code if any matches found
   --replace                  Replace matching symbol with text from stdin
   --rexec TEXT               Replace with the result of piping to this tool
+  --csv                      Output as CSV
+  --tsv                      Output as TSV
+  --json                     Output as JSON
+  --nl                       Output as newline-delimited JSON
+  --id-prefix TEXT           Prefix to use for symbol IDs
   --help                     Show this message and exit.
 ```

symbex-1.4.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+symbex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+symbex/__main__.py,sha256=8hDtWlaFZK24KhfNq_ZKgtXqYHsDQDetukOCMlsbW0Q,59
+symbex/cli.py,sha256=kGCltpO79yjVpXEerJlae5Sc22kOp3evy08dy0SZcU0,18096
+symbex/lib.py,sha256=CiKKOOyc6Ne_7igzNItMZpa5I6o12LEbIPrQU9al7Ro,11436
+symbex-1.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+symbex-1.4.1.dist-info/METADATA,sha256=vUyXJ1kEdUDfM_ye3sE7LDqjCcHfEavgEfEzED9vpbM,21564
+symbex-1.4.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+symbex-1.4.1.dist-info/entry_points.txt,sha256=YgMSEfEGqNMHM9RysFObH8lkQKVZKyymKLnXbVue_Uk,42
+symbex-1.4.1.dist-info/top_level.txt,sha256=qwle8HjAaYgpdMIHlJcTcN4gaG4wmDqUvkt54beTBTs,7
+symbex-1.4.1.dist-info/RECORD,,

{symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.41.2)
+Generator: setuptools (75.8.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

symbex-1.3.1.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-symbex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-symbex/__main__.py,sha256=8hDtWlaFZK24KhfNq_ZKgtXqYHsDQDetukOCMlsbW0Q,59
-symbex/cli.py,sha256=uuP28l7qAWWPE3b0u2HLM1ID89m402EBNgYOhcejdgw,14974
-symbex/lib.py,sha256=CiKKOOyc6Ne_7igzNItMZpa5I6o12LEbIPrQU9al7Ro,11436
-symbex-1.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-symbex-1.3.1.dist-info/METADATA,sha256=1oJBLr7_LdzFltBb5cMMCttPFo-XZhFGOzK_1MOQ9ls,19263
-symbex-1.3.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
-symbex-1.3.1.dist-info/entry_points.txt,sha256=YgMSEfEGqNMHM9RysFObH8lkQKVZKyymKLnXbVue_Uk,42
-symbex-1.3.1.dist-info/top_level.txt,sha256=qwle8HjAaYgpdMIHlJcTcN4gaG4wmDqUvkt54beTBTs,7
-symbex-1.3.1.dist-info/RECORD,,

{symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

symbex 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

symbex 1.3.1py3-none-any.whl → 1.4.1py3-none-any.whl