symbex 1.3.1__tar.gz → 1.4.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {symbex-1.3.1/symbex.egg-info → symbex-1.4.1}/PKG-INFO +77 -6
- symbex-1.3.1/PKG-INFO → symbex-1.4.1/README.md +57 -17
- {symbex-1.3.1 → symbex-1.4.1}/setup.py +1 -1
- {symbex-1.3.1 → symbex-1.4.1}/symbex/cli.py +149 -52
- symbex-1.3.1/README.md → symbex-1.4.1/symbex.egg-info/PKG-INFO +88 -2
- {symbex-1.3.1 → symbex-1.4.1}/symbex.egg-info/SOURCES.txt +1 -0
- symbex-1.4.1/tests/test_output.py +46 -0
- {symbex-1.3.1 → symbex-1.4.1}/tests/test_symbex.py +5 -3
- {symbex-1.3.1 → symbex-1.4.1}/LICENSE +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/setup.cfg +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/symbex/__init__.py +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/symbex/__main__.py +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/symbex/lib.py +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/symbex.egg-info/dependency_links.txt +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/symbex.egg-info/entry_points.txt +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/symbex.egg-info/requires.txt +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/symbex.egg-info/top_level.txt +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/tests/test_filters.py +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/tests/test_imports.py +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/tests/test_replace.py +0 -0
- {symbex-1.3.1 → symbex-1.4.1}/tests/test_symbols.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: symbex
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.4.1
|
4
4
|
Summary: Find the Python code for specified symbols
|
5
5
|
Home-page: https://github.com/simonw/symbex
|
6
6
|
Author: Simon Willison
|
@@ -10,10 +10,26 @@ Project-URL: CI, https://github.com/simonw/symbex/actions
|
|
10
10
|
Project-URL: Changelog, https://github.com/simonw/symbex/releases
|
11
11
|
Requires-Python: >=3.8
|
12
12
|
Description-Content-Type: text/markdown
|
13
|
-
Provides-Extra: test
|
14
13
|
License-File: LICENSE
|
15
|
-
|
16
|
-
|
14
|
+
Requires-Dist: click
|
15
|
+
Provides-Extra: test
|
16
|
+
Requires-Dist: pytest; extra == "test"
|
17
|
+
Requires-Dist: pytest-icdiff; extra == "test"
|
18
|
+
Requires-Dist: cogapp; extra == "test"
|
19
|
+
Requires-Dist: PyYAML; extra == "test"
|
20
|
+
Requires-Dist: ruff; extra == "test"
|
21
|
+
Dynamic: author
|
22
|
+
Dynamic: description
|
23
|
+
Dynamic: description-content-type
|
24
|
+
Dynamic: home-page
|
25
|
+
Dynamic: license
|
26
|
+
Dynamic: project-url
|
27
|
+
Dynamic: provides-extra
|
28
|
+
Dynamic: requires-dist
|
29
|
+
Dynamic: requires-python
|
30
|
+
Dynamic: summary
|
31
|
+
|
32
|
+
# Symbex
|
17
33
|
|
18
34
|
[](https://pypi.org/project/symbex/)
|
19
35
|
[](https://github.com/simonw/symbex/releases)
|
@@ -22,7 +38,7 @@ License-File: LICENSE
|
|
22
38
|
|
23
39
|
Find the Python code for specified symbols
|
24
40
|
|
25
|
-
Read [
|
41
|
+
Read [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project.
|
26
42
|
|
27
43
|
## Installation
|
28
44
|
|
@@ -307,6 +323,42 @@ Or to count every async test function:
|
|
307
323
|
```bash
|
308
324
|
symbex --async 'test_*' --count
|
309
325
|
```
|
326
|
+
## Structured output
|
327
|
+
|
328
|
+
LLM defaults to outputting plain text (actually valid Python code, thanks to the way it uses comments).
|
329
|
+
|
330
|
+
You can request output in CSV, TSV, JSON or newline-delimited JSON instead, using the following options:
|
331
|
+
|
332
|
+
- `--json`: a JSON array, `[{"id": "...", "code": "..."}]`
|
333
|
+
- `--nl`: newline-delimited JSON, `{"id": "...", "code": "..."}` per line
|
334
|
+
- `--csv`: CSV with `id,code` as the heading row
|
335
|
+
- `--tsv`: TSV with `id\tcode` as the heading row
|
336
|
+
|
337
|
+
In each case the ID will be the path to the file containing the symbol, followed by a colon, followed by the line number of the symbol, for example:
|
338
|
+
|
339
|
+
```json
|
340
|
+
{
|
341
|
+
"id": "symbex/lib.py:82",
|
342
|
+
"code": "def match(name: str, symbols: Iterable[str]) -> bool:"
|
343
|
+
}
|
344
|
+
```
|
345
|
+
If you pass `-i/--imports` the ID will be the import line instead:
|
346
|
+
```json
|
347
|
+
{
|
348
|
+
"id": "from symbex.lib import match",
|
349
|
+
"code": "def match(name: str, symbols: Iterable[str]) -> bool:"
|
350
|
+
}
|
351
|
+
```
|
352
|
+
Pass `--id-prefix 'something:'` to add the specified prefix to the start of each ID.
|
353
|
+
|
354
|
+
This example will generate a CSV file of all of your test functions, using the import style of IDs and a prefix of `test:`:
|
355
|
+
|
356
|
+
```bash
|
357
|
+
symbex 'test_*' \
|
358
|
+
--function \
|
359
|
+
--imports \
|
360
|
+
--csv > tests.csv
|
361
|
+
```
|
310
362
|
|
311
363
|
## Using with LLM
|
312
364
|
|
@@ -323,6 +375,20 @@ And got back this:
|
|
323
375
|
|
324
376
|
> This code defines a custom `Response` class with methods for returning HTTP responses. It includes methods for setting cookies, returning HTML, text, and JSON responses, and redirecting to a different URL. The `asgi_send` method sends the response to the client using the ASGI (Asynchronous Server Gateway Interface) protocol.
|
325
377
|
|
378
|
+
The structured output feature is designed to be used with [LLM embeddings](https://llm.datasette.io/en/stable/embeddings/index.html). You can generate embeddings for every symbol in your codebase using [llm embed-multi](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed-multi) like this:
|
379
|
+
|
380
|
+
```bash
|
381
|
+
symbex '*' '*:*' --nl | \
|
382
|
+
llm embed-multi symbols - \
|
383
|
+
--format nl --database embeddings.db --store
|
384
|
+
```
|
385
|
+
This creates a database in `embeddings.db` containing all of your symbols along with embedding vectors.
|
386
|
+
|
387
|
+
You can then search your code like this:
|
388
|
+
```bash
|
389
|
+
llm similar symbols -d embeddings.db -c 'test csv' | jq
|
390
|
+
```
|
391
|
+
|
326
392
|
## Replacing a matched symbol
|
327
393
|
|
328
394
|
The `--replace` option can be used to replace a single matched symbol with content piped in to standard input.
|
@@ -519,6 +585,11 @@ Options:
|
|
519
585
|
--check Exit with non-zero code if any matches found
|
520
586
|
--replace Replace matching symbol with text from stdin
|
521
587
|
--rexec TEXT Replace with the result of piping to this tool
|
588
|
+
--csv Output as CSV
|
589
|
+
--tsv Output as TSV
|
590
|
+
--json Output as JSON
|
591
|
+
--nl Output as newline-delimited JSON
|
592
|
+
--id-prefix TEXT Prefix to use for symbol IDs
|
522
593
|
--help Show this message and exit.
|
523
594
|
|
524
595
|
```
|
@@ -1,19 +1,4 @@
|
|
1
|
-
|
2
|
-
Name: symbex
|
3
|
-
Version: 1.3.1
|
4
|
-
Summary: Find the Python code for specified symbols
|
5
|
-
Home-page: https://github.com/simonw/symbex
|
6
|
-
Author: Simon Willison
|
7
|
-
License: Apache License, Version 2.0
|
8
|
-
Project-URL: Issues, https://github.com/simonw/symbex/issues
|
9
|
-
Project-URL: CI, https://github.com/simonw/symbex/actions
|
10
|
-
Project-URL: Changelog, https://github.com/simonw/symbex/releases
|
11
|
-
Requires-Python: >=3.8
|
12
|
-
Description-Content-Type: text/markdown
|
13
|
-
Provides-Extra: test
|
14
|
-
License-File: LICENSE
|
15
|
-
|
16
|
-
# symbex
|
1
|
+
# Symbex
|
17
2
|
|
18
3
|
[](https://pypi.org/project/symbex/)
|
19
4
|
[](https://github.com/simonw/symbex/releases)
|
@@ -22,7 +7,7 @@ License-File: LICENSE
|
|
22
7
|
|
23
8
|
Find the Python code for specified symbols
|
24
9
|
|
25
|
-
Read [
|
10
|
+
Read [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project.
|
26
11
|
|
27
12
|
## Installation
|
28
13
|
|
@@ -307,6 +292,42 @@ Or to count every async test function:
|
|
307
292
|
```bash
|
308
293
|
symbex --async 'test_*' --count
|
309
294
|
```
|
295
|
+
## Structured output
|
296
|
+
|
297
|
+
LLM defaults to outputting plain text (actually valid Python code, thanks to the way it uses comments).
|
298
|
+
|
299
|
+
You can request output in CSV, TSV, JSON or newline-delimited JSON instead, using the following options:
|
300
|
+
|
301
|
+
- `--json`: a JSON array, `[{"id": "...", "code": "..."}]`
|
302
|
+
- `--nl`: newline-delimited JSON, `{"id": "...", "code": "..."}` per line
|
303
|
+
- `--csv`: CSV with `id,code` as the heading row
|
304
|
+
- `--tsv`: TSV with `id\tcode` as the heading row
|
305
|
+
|
306
|
+
In each case the ID will be the path to the file containing the symbol, followed by a colon, followed by the line number of the symbol, for example:
|
307
|
+
|
308
|
+
```json
|
309
|
+
{
|
310
|
+
"id": "symbex/lib.py:82",
|
311
|
+
"code": "def match(name: str, symbols: Iterable[str]) -> bool:"
|
312
|
+
}
|
313
|
+
```
|
314
|
+
If you pass `-i/--imports` the ID will be the import line instead:
|
315
|
+
```json
|
316
|
+
{
|
317
|
+
"id": "from symbex.lib import match",
|
318
|
+
"code": "def match(name: str, symbols: Iterable[str]) -> bool:"
|
319
|
+
}
|
320
|
+
```
|
321
|
+
Pass `--id-prefix 'something:'` to add the specified prefix to the start of each ID.
|
322
|
+
|
323
|
+
This example will generate a CSV file of all of your test functions, using the import style of IDs and a prefix of `test:`:
|
324
|
+
|
325
|
+
```bash
|
326
|
+
symbex 'test_*' \
|
327
|
+
--function \
|
328
|
+
--imports \
|
329
|
+
--csv > tests.csv
|
330
|
+
```
|
310
331
|
|
311
332
|
## Using with LLM
|
312
333
|
|
@@ -323,6 +344,20 @@ And got back this:
|
|
323
344
|
|
324
345
|
> This code defines a custom `Response` class with methods for returning HTTP responses. It includes methods for setting cookies, returning HTML, text, and JSON responses, and redirecting to a different URL. The `asgi_send` method sends the response to the client using the ASGI (Asynchronous Server Gateway Interface) protocol.
|
325
346
|
|
347
|
+
The structured output feature is designed to be used with [LLM embeddings](https://llm.datasette.io/en/stable/embeddings/index.html). You can generate embeddings for every symbol in your codebase using [llm embed-multi](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed-multi) like this:
|
348
|
+
|
349
|
+
```bash
|
350
|
+
symbex '*' '*:*' --nl | \
|
351
|
+
llm embed-multi symbols - \
|
352
|
+
--format nl --database embeddings.db --store
|
353
|
+
```
|
354
|
+
This creates a database in `embeddings.db` containing all of your symbols along with embedding vectors.
|
355
|
+
|
356
|
+
You can then search your code like this:
|
357
|
+
```bash
|
358
|
+
llm similar symbols -d embeddings.db -c 'test csv' | jq
|
359
|
+
```
|
360
|
+
|
326
361
|
## Replacing a matched symbol
|
327
362
|
|
328
363
|
The `--replace` option can be used to replace a single matched symbol with content piped in to standard input.
|
@@ -519,6 +554,11 @@ Options:
|
|
519
554
|
--check Exit with non-zero code if any matches found
|
520
555
|
--replace Replace matching symbol with text from stdin
|
521
556
|
--rexec TEXT Replace with the result of piping to this tool
|
557
|
+
--csv Output as CSV
|
558
|
+
--tsv Output as TSV
|
559
|
+
--json Output as JSON
|
560
|
+
--nl Output as newline-delimited JSON
|
561
|
+
--id-prefix TEXT Prefix to use for symbol IDs
|
522
562
|
--help Show this message and exit.
|
523
563
|
|
524
564
|
```
|
@@ -1,11 +1,15 @@
|
|
1
1
|
import ast
|
2
2
|
import click
|
3
|
+
import csv
|
4
|
+
import dataclasses
|
3
5
|
import importlib
|
4
6
|
import inspect
|
7
|
+
import json
|
5
8
|
import pathlib
|
6
9
|
import site
|
7
10
|
import subprocess
|
8
11
|
import sys
|
12
|
+
from typing import TextIO, Iterable, Literal, Tuple
|
9
13
|
|
10
14
|
from .lib import (
|
11
15
|
code_for_node,
|
@@ -16,6 +20,14 @@ from .lib import (
|
|
16
20
|
)
|
17
21
|
|
18
22
|
|
23
|
+
@dataclasses.dataclass
|
24
|
+
class Output:
|
25
|
+
symbol_id: str
|
26
|
+
output_identifier_line: str
|
27
|
+
output_import_line: str
|
28
|
+
snippet: str
|
29
|
+
|
30
|
+
|
19
31
|
@click.command()
|
20
32
|
@click.version_option()
|
21
33
|
@click.argument("symbols", nargs=-1)
|
@@ -169,6 +181,12 @@ from .lib import (
|
|
169
181
|
help="Replace matching symbol with text from stdin",
|
170
182
|
)
|
171
183
|
@click.option("--rexec", help="Replace with the result of piping to this tool")
|
184
|
+
# Output options
|
185
|
+
@click.option("csv_", "--csv", is_flag=True, help="Output as CSV")
|
186
|
+
@click.option("--tsv", is_flag=True, help="Output as TSV")
|
187
|
+
@click.option("json_", "--json", is_flag=True, help="Output as JSON")
|
188
|
+
@click.option("--nl", is_flag=True, help="Output as newline-delimited JSON")
|
189
|
+
@click.option("--id-prefix", help="Prefix to use for symbol IDs")
|
172
190
|
def cli(
|
173
191
|
symbols,
|
174
192
|
files,
|
@@ -200,6 +218,11 @@ def cli(
|
|
200
218
|
check,
|
201
219
|
replace,
|
202
220
|
rexec,
|
221
|
+
csv_,
|
222
|
+
tsv,
|
223
|
+
json_,
|
224
|
+
nl,
|
225
|
+
id_prefix,
|
203
226
|
):
|
204
227
|
"""
|
205
228
|
Find symbols in Python code and print the code for them.
|
@@ -258,6 +281,17 @@ def cli(
|
|
258
281
|
symbex first_function --rexec "sed 's/^/# /'"
|
259
282
|
# This uses sed to comment out the function body
|
260
283
|
"""
|
284
|
+
# Only one of --json, --csv, --tsv, --nl
|
285
|
+
output_formats = [csv_, tsv, json_, nl]
|
286
|
+
if sum(output_formats) > 1:
|
287
|
+
raise click.ClickException("Only one of --csv, --tsv, --json, --nl can be used")
|
288
|
+
if id_prefix and not sum(output_formats):
|
289
|
+
raise click.ClickException(
|
290
|
+
"--id-prefix can only be used with --csv, --tsv, --json or --nl"
|
291
|
+
)
|
292
|
+
if id_prefix is None:
|
293
|
+
id_prefix = ""
|
294
|
+
|
261
295
|
if modules:
|
262
296
|
module_dirs = []
|
263
297
|
module_files = []
|
@@ -362,7 +396,7 @@ def cli(
|
|
362
396
|
for directory in directories:
|
363
397
|
for path in pathlib.Path(directory).rglob("*.py"):
|
364
398
|
# Skip if path is inside any of 'excludes'
|
365
|
-
if any(
|
399
|
+
if any(path.resolve().is_relative_to(exclude) for exclude in excludes):
|
366
400
|
continue
|
367
401
|
if path.is_file():
|
368
402
|
yield path
|
@@ -436,54 +470,90 @@ def cli(
|
|
436
470
|
pwd = pathlib.Path(".").resolve()
|
437
471
|
num_matches = 0
|
438
472
|
replace_matches = []
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
click.secho(f"# Syntax error in {file}: {ex}", err=True, fg="yellow")
|
451
|
-
continue
|
452
|
-
for node, class_name in nodes:
|
453
|
-
if not filter(node):
|
473
|
+
|
474
|
+
def stuff_to_output():
|
475
|
+
nonlocal num_matches
|
476
|
+
for file in iterate_files():
|
477
|
+
try:
|
478
|
+
code = read_file(file)
|
479
|
+
except UnicodeDecodeError as ex:
|
480
|
+
if not silent:
|
481
|
+
click.secho(
|
482
|
+
f"# Unicode error in {file}: {ex}", err=True, fg="yellow"
|
483
|
+
)
|
454
484
|
continue
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
else:
|
463
|
-
# else print absolute path
|
464
|
-
path = file.resolve()
|
465
|
-
snippet, line_no = code_for_node(code, node, class_name, signatures, docs)
|
466
|
-
if replace:
|
467
|
-
replace_matches.append((file.resolve(), snippet, line_no))
|
485
|
+
try:
|
486
|
+
nodes = find_symbol_nodes(code, str(file), symbols)
|
487
|
+
except SyntaxError as ex:
|
488
|
+
if not silent:
|
489
|
+
click.secho(
|
490
|
+
f"# Syntax error in {file}: {ex}", err=True, fg="yellow"
|
491
|
+
)
|
468
492
|
continue
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
493
|
+
for node, class_name in nodes:
|
494
|
+
if not filter(node):
|
495
|
+
continue
|
496
|
+
if count or check:
|
497
|
+
num_matches += 1
|
498
|
+
if count or not signatures:
|
499
|
+
continue
|
500
|
+
# If file is within pwd, print relative path
|
501
|
+
if pwd in file.resolve().parents:
|
502
|
+
path = file.resolve().relative_to(pwd)
|
503
|
+
else:
|
504
|
+
# else print absolute path
|
505
|
+
path = file.resolve()
|
506
|
+
snippet, line_no = code_for_node(
|
507
|
+
code, node, class_name, signatures, docs
|
478
508
|
)
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
509
|
+
if replace:
|
510
|
+
replace_matches.append((file.resolve(), snippet, line_no))
|
511
|
+
continue
|
512
|
+
|
513
|
+
output_identifier_line = None
|
514
|
+
output_import_line = None
|
515
|
+
symbol_id = None
|
516
|
+
|
517
|
+
if not no_file:
|
518
|
+
bits = ["# File:", path]
|
519
|
+
if class_name:
|
520
|
+
bits.extend(["Class:", class_name])
|
521
|
+
bits.extend(["Line:", line_no])
|
522
|
+
symbol_id = "{}:{}".format(path, line_no)
|
523
|
+
output_identifier_line = " ".join(str(bit) for bit in bits)
|
524
|
+
if imports:
|
525
|
+
import_line = import_line_for_function(
|
526
|
+
node.name, path, sys_paths or directories
|
483
527
|
)
|
484
|
-
|
485
|
-
|
528
|
+
# If it's a class then output '# from x import Class' instead
|
529
|
+
if class_name:
|
530
|
+
import_line = (
|
531
|
+
import_line.split(" import ")[0] + " import " + class_name
|
532
|
+
)
|
533
|
+
symbol_id = import_line
|
534
|
+
output_import_line = "# " + import_line
|
535
|
+
|
536
|
+
yield Output(
|
537
|
+
symbol_id, output_identifier_line, output_import_line, snippet
|
538
|
+
)
|
539
|
+
|
540
|
+
if sum(output_formats) == 0:
|
541
|
+
for item in stuff_to_output():
|
542
|
+
if item.output_identifier_line:
|
543
|
+
click.echo(item.output_identifier_line)
|
544
|
+
if item.output_import_line:
|
545
|
+
click.echo(item.output_import_line)
|
546
|
+
click.echo(item.snippet)
|
486
547
|
click.echo()
|
548
|
+
else:
|
549
|
+
# Do the fancy output formats thing
|
550
|
+
to_output(
|
551
|
+
sys.stdout,
|
552
|
+
((id_prefix + item.symbol_id, item.snippet) for item in stuff_to_output()),
|
553
|
+
format="csv" if csv_ else "tsv" if tsv else "json" if json_ else "nl",
|
554
|
+
)
|
555
|
+
return
|
556
|
+
|
487
557
|
if count:
|
488
558
|
click.echo(num_matches)
|
489
559
|
|
@@ -531,13 +601,40 @@ def cli(
|
|
531
601
|
filepath.write_text(new, "utf-8")
|
532
602
|
|
533
603
|
|
534
|
-
def is_subpath(path: pathlib.Path, parent: pathlib.Path) -> bool:
|
535
|
-
try:
|
536
|
-
path.relative_to(parent)
|
537
|
-
return True
|
538
|
-
except ValueError:
|
539
|
-
return False
|
540
|
-
|
541
|
-
|
542
604
|
def is_dunder(name):
|
543
605
|
return name.startswith("__") and name.endswith("__")
|
606
|
+
|
607
|
+
|
608
|
+
def to_output(
|
609
|
+
fp: TextIO,
|
610
|
+
lines: Iterable[Tuple[str, str]],
|
611
|
+
format: Literal["csv", "tsv", "json", "nl"] = "csv",
|
612
|
+
) -> None:
|
613
|
+
if format == "nl":
|
614
|
+
for id, content in lines:
|
615
|
+
line = json.dumps({"id": id, "code": content})
|
616
|
+
fp.write(line + "\n")
|
617
|
+
return
|
618
|
+
|
619
|
+
elif format == "json":
|
620
|
+
fp.write("[")
|
621
|
+
first = True
|
622
|
+
for id, content in lines:
|
623
|
+
line = json.dumps({"id": id, "code": content})
|
624
|
+
if first:
|
625
|
+
fp.write(line)
|
626
|
+
first = False
|
627
|
+
else:
|
628
|
+
fp.write(",\n " + line)
|
629
|
+
fp.write("]\n")
|
630
|
+
return
|
631
|
+
|
632
|
+
dialect = "excel" if format == "csv" else "excel-tab"
|
633
|
+
writer = csv.writer(fp, dialect=dialect)
|
634
|
+
|
635
|
+
# Write header
|
636
|
+
writer.writerow(["id", "code"])
|
637
|
+
|
638
|
+
# Write content
|
639
|
+
for id, content in lines:
|
640
|
+
writer.writerow([id, content])
|
@@ -1,4 +1,35 @@
|
|
1
|
-
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: symbex
|
3
|
+
Version: 1.4.1
|
4
|
+
Summary: Find the Python code for specified symbols
|
5
|
+
Home-page: https://github.com/simonw/symbex
|
6
|
+
Author: Simon Willison
|
7
|
+
License: Apache License, Version 2.0
|
8
|
+
Project-URL: Issues, https://github.com/simonw/symbex/issues
|
9
|
+
Project-URL: CI, https://github.com/simonw/symbex/actions
|
10
|
+
Project-URL: Changelog, https://github.com/simonw/symbex/releases
|
11
|
+
Requires-Python: >=3.8
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
License-File: LICENSE
|
14
|
+
Requires-Dist: click
|
15
|
+
Provides-Extra: test
|
16
|
+
Requires-Dist: pytest; extra == "test"
|
17
|
+
Requires-Dist: pytest-icdiff; extra == "test"
|
18
|
+
Requires-Dist: cogapp; extra == "test"
|
19
|
+
Requires-Dist: PyYAML; extra == "test"
|
20
|
+
Requires-Dist: ruff; extra == "test"
|
21
|
+
Dynamic: author
|
22
|
+
Dynamic: description
|
23
|
+
Dynamic: description-content-type
|
24
|
+
Dynamic: home-page
|
25
|
+
Dynamic: license
|
26
|
+
Dynamic: project-url
|
27
|
+
Dynamic: provides-extra
|
28
|
+
Dynamic: requires-dist
|
29
|
+
Dynamic: requires-python
|
30
|
+
Dynamic: summary
|
31
|
+
|
32
|
+
# Symbex
|
2
33
|
|
3
34
|
[](https://pypi.org/project/symbex/)
|
4
35
|
[](https://github.com/simonw/symbex/releases)
|
@@ -7,7 +38,7 @@
|
|
7
38
|
|
8
39
|
Find the Python code for specified symbols
|
9
40
|
|
10
|
-
Read [
|
41
|
+
Read [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project.
|
11
42
|
|
12
43
|
## Installation
|
13
44
|
|
@@ -292,6 +323,42 @@ Or to count every async test function:
|
|
292
323
|
```bash
|
293
324
|
symbex --async 'test_*' --count
|
294
325
|
```
|
326
|
+
## Structured output
|
327
|
+
|
328
|
+
LLM defaults to outputting plain text (actually valid Python code, thanks to the way it uses comments).
|
329
|
+
|
330
|
+
You can request output in CSV, TSV, JSON or newline-delimited JSON instead, using the following options:
|
331
|
+
|
332
|
+
- `--json`: a JSON array, `[{"id": "...", "code": "..."}]`
|
333
|
+
- `--nl`: newline-delimited JSON, `{"id": "...", "code": "..."}` per line
|
334
|
+
- `--csv`: CSV with `id,code` as the heading row
|
335
|
+
- `--tsv`: TSV with `id\tcode` as the heading row
|
336
|
+
|
337
|
+
In each case the ID will be the path to the file containing the symbol, followed by a colon, followed by the line number of the symbol, for example:
|
338
|
+
|
339
|
+
```json
|
340
|
+
{
|
341
|
+
"id": "symbex/lib.py:82",
|
342
|
+
"code": "def match(name: str, symbols: Iterable[str]) -> bool:"
|
343
|
+
}
|
344
|
+
```
|
345
|
+
If you pass `-i/--imports` the ID will be the import line instead:
|
346
|
+
```json
|
347
|
+
{
|
348
|
+
"id": "from symbex.lib import match",
|
349
|
+
"code": "def match(name: str, symbols: Iterable[str]) -> bool:"
|
350
|
+
}
|
351
|
+
```
|
352
|
+
Pass `--id-prefix 'something:'` to add the specified prefix to the start of each ID.
|
353
|
+
|
354
|
+
This example will generate a CSV file of all of your test functions, using the import style of IDs and a prefix of `test:`:
|
355
|
+
|
356
|
+
```bash
|
357
|
+
symbex 'test_*' \
|
358
|
+
--function \
|
359
|
+
--imports \
|
360
|
+
--csv > tests.csv
|
361
|
+
```
|
295
362
|
|
296
363
|
## Using with LLM
|
297
364
|
|
@@ -308,6 +375,20 @@ And got back this:
|
|
308
375
|
|
309
376
|
> This code defines a custom `Response` class with methods for returning HTTP responses. It includes methods for setting cookies, returning HTML, text, and JSON responses, and redirecting to a different URL. The `asgi_send` method sends the response to the client using the ASGI (Asynchronous Server Gateway Interface) protocol.
|
310
377
|
|
378
|
+
The structured output feature is designed to be used with [LLM embeddings](https://llm.datasette.io/en/stable/embeddings/index.html). You can generate embeddings for every symbol in your codebase using [llm embed-multi](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed-multi) like this:
|
379
|
+
|
380
|
+
```bash
|
381
|
+
symbex '*' '*:*' --nl | \
|
382
|
+
llm embed-multi symbols - \
|
383
|
+
--format nl --database embeddings.db --store
|
384
|
+
```
|
385
|
+
This creates a database in `embeddings.db` containing all of your symbols along with embedding vectors.
|
386
|
+
|
387
|
+
You can then search your code like this:
|
388
|
+
```bash
|
389
|
+
llm similar symbols -d embeddings.db -c 'test csv' | jq
|
390
|
+
```
|
391
|
+
|
311
392
|
## Replacing a matched symbol
|
312
393
|
|
313
394
|
The `--replace` option can be used to replace a single matched symbol with content piped in to standard input.
|
@@ -504,6 +585,11 @@ Options:
|
|
504
585
|
--check Exit with non-zero code if any matches found
|
505
586
|
--replace Replace matching symbol with text from stdin
|
506
587
|
--rexec TEXT Replace with the result of piping to this tool
|
588
|
+
--csv Output as CSV
|
589
|
+
--tsv Output as TSV
|
590
|
+
--json Output as JSON
|
591
|
+
--nl Output as newline-delimited JSON
|
592
|
+
--id-prefix TEXT Prefix to use for symbol IDs
|
507
593
|
--help Show this message and exit.
|
508
594
|
|
509
595
|
```
|
@@ -0,0 +1,46 @@
|
|
1
|
+
import pytest
|
2
|
+
from click.testing import CliRunner
|
3
|
+
from symbex.cli import cli
|
4
|
+
|
5
|
+
|
6
|
+
@pytest.mark.parametrize(
|
7
|
+
"extra_args,expected,expected_error",
|
8
|
+
(
|
9
|
+
(["--json"], '[{"id": "symbex.py:1", "code": "def blah():"}]\n', None),
|
10
|
+
(["--csv"], "id,code\nsymbex.py:1,def blah():\n", None),
|
11
|
+
(["--tsv"], "id\tcode\nsymbex.py:1\tdef blah():\n", None),
|
12
|
+
(["--nl"], '{"id": "symbex.py:1", "code": "def blah():"}\n', None),
|
13
|
+
# ID prefix
|
14
|
+
(
|
15
|
+
["--nl", "--id-prefix", "foo:"],
|
16
|
+
'{"id": "foo:symbex.py:1", "code": "def blah():"}\n',
|
17
|
+
None,
|
18
|
+
),
|
19
|
+
# Error states
|
20
|
+
(
|
21
|
+
["--json", "--csv"],
|
22
|
+
None,
|
23
|
+
"Only one of --csv, --tsv, --json, --nl can be used",
|
24
|
+
),
|
25
|
+
(
|
26
|
+
["--id-prefix", "foo:"],
|
27
|
+
None,
|
28
|
+
"--id-prefix can only be used with --csv, --tsv, --json or --nl",
|
29
|
+
),
|
30
|
+
),
|
31
|
+
)
|
32
|
+
def test_output(extra_args, expected, expected_error):
|
33
|
+
runner = CliRunner()
|
34
|
+
with runner.isolated_filesystem():
|
35
|
+
open("symbex.py", "w").write("def blah():\n pass\n")
|
36
|
+
result = runner.invoke(
|
37
|
+
cli,
|
38
|
+
["blah", "-s"] + extra_args,
|
39
|
+
catch_exceptions=False,
|
40
|
+
)
|
41
|
+
if expected_error:
|
42
|
+
assert result.exit_code != 0
|
43
|
+
assert expected_error in result.stdout
|
44
|
+
else:
|
45
|
+
assert result.exit_code == 0
|
46
|
+
assert result.output == expected
|
@@ -89,6 +89,8 @@ def directory_full_of_code(tmpdir):
|
|
89
89
|
["baz", "-d", "nested.py", "-x", "nested.py/x/", "--silent"],
|
90
90
|
"",
|
91
91
|
),
|
92
|
+
# -x to exclude top level directory
|
93
|
+
(["baz", "-x", "nested.py", "--silent"], ""),
|
92
94
|
# Classes
|
93
95
|
(
|
94
96
|
["MyClass", "--silent"],
|
@@ -164,10 +166,10 @@ def test_fixture(directory_full_of_code, monkeypatch, args, expected):
|
|
164
166
|
'def baz(delimiter=", ", type=str):'
|
165
167
|
),
|
166
168
|
),
|
167
|
-
#
|
169
|
+
# Tests for the --module option
|
168
170
|
(
|
169
|
-
["-m", "
|
170
|
-
("
|
171
|
+
["-m", "contextlib", "suppress", "--silent", "-sn"],
|
172
|
+
("class suppress(AbstractContextManager):"),
|
171
173
|
),
|
172
174
|
),
|
173
175
|
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|