symbex 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symbex/cli.py +149 -52
- {symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/METADATA +75 -10
- symbex-1.4.1.dist-info/RECORD +10 -0
- {symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/WHEEL +1 -1
- symbex-1.3.1.dist-info/RECORD +0 -10
- {symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/LICENSE +0 -0
- {symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/entry_points.txt +0 -0
- {symbex-1.3.1.dist-info → symbex-1.4.1.dist-info}/top_level.txt +0 -0
symbex/cli.py
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
import ast
|
2
2
|
import click
|
3
|
+
import csv
|
4
|
+
import dataclasses
|
3
5
|
import importlib
|
4
6
|
import inspect
|
7
|
+
import json
|
5
8
|
import pathlib
|
6
9
|
import site
|
7
10
|
import subprocess
|
8
11
|
import sys
|
12
|
+
from typing import TextIO, Iterable, Literal, Tuple
|
9
13
|
|
10
14
|
from .lib import (
|
11
15
|
code_for_node,
|
@@ -16,6 +20,14 @@ from .lib import (
|
|
16
20
|
)
|
17
21
|
|
18
22
|
|
23
|
+
@dataclasses.dataclass
|
24
|
+
class Output:
|
25
|
+
symbol_id: str
|
26
|
+
output_identifier_line: str
|
27
|
+
output_import_line: str
|
28
|
+
snippet: str
|
29
|
+
|
30
|
+
|
19
31
|
@click.command()
|
20
32
|
@click.version_option()
|
21
33
|
@click.argument("symbols", nargs=-1)
|
@@ -169,6 +181,12 @@ from .lib import (
|
|
169
181
|
help="Replace matching symbol with text from stdin",
|
170
182
|
)
|
171
183
|
@click.option("--rexec", help="Replace with the result of piping to this tool")
|
184
|
+
# Output options
|
185
|
+
@click.option("csv_", "--csv", is_flag=True, help="Output as CSV")
|
186
|
+
@click.option("--tsv", is_flag=True, help="Output as TSV")
|
187
|
+
@click.option("json_", "--json", is_flag=True, help="Output as JSON")
|
188
|
+
@click.option("--nl", is_flag=True, help="Output as newline-delimited JSON")
|
189
|
+
@click.option("--id-prefix", help="Prefix to use for symbol IDs")
|
172
190
|
def cli(
|
173
191
|
symbols,
|
174
192
|
files,
|
@@ -200,6 +218,11 @@ def cli(
|
|
200
218
|
check,
|
201
219
|
replace,
|
202
220
|
rexec,
|
221
|
+
csv_,
|
222
|
+
tsv,
|
223
|
+
json_,
|
224
|
+
nl,
|
225
|
+
id_prefix,
|
203
226
|
):
|
204
227
|
"""
|
205
228
|
Find symbols in Python code and print the code for them.
|
@@ -258,6 +281,17 @@ def cli(
|
|
258
281
|
symbex first_function --rexec "sed 's/^/# /'"
|
259
282
|
# This uses sed to comment out the function body
|
260
283
|
"""
|
284
|
+
# Only one of --json, --csv, --tsv, --nl
|
285
|
+
output_formats = [csv_, tsv, json_, nl]
|
286
|
+
if sum(output_formats) > 1:
|
287
|
+
raise click.ClickException("Only one of --csv, --tsv, --json, --nl can be used")
|
288
|
+
if id_prefix and not sum(output_formats):
|
289
|
+
raise click.ClickException(
|
290
|
+
"--id-prefix can only be used with --csv, --tsv, --json or --nl"
|
291
|
+
)
|
292
|
+
if id_prefix is None:
|
293
|
+
id_prefix = ""
|
294
|
+
|
261
295
|
if modules:
|
262
296
|
module_dirs = []
|
263
297
|
module_files = []
|
@@ -362,7 +396,7 @@ def cli(
|
|
362
396
|
for directory in directories:
|
363
397
|
for path in pathlib.Path(directory).rglob("*.py"):
|
364
398
|
# Skip if path is inside any of 'excludes'
|
365
|
-
if any(
|
399
|
+
if any(path.resolve().is_relative_to(exclude) for exclude in excludes):
|
366
400
|
continue
|
367
401
|
if path.is_file():
|
368
402
|
yield path
|
@@ -436,54 +470,90 @@ def cli(
|
|
436
470
|
pwd = pathlib.Path(".").resolve()
|
437
471
|
num_matches = 0
|
438
472
|
replace_matches = []
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
click.secho(f"# Syntax error in {file}: {ex}", err=True, fg="yellow")
|
451
|
-
continue
|
452
|
-
for node, class_name in nodes:
|
453
|
-
if not filter(node):
|
473
|
+
|
474
|
+
def stuff_to_output():
|
475
|
+
nonlocal num_matches
|
476
|
+
for file in iterate_files():
|
477
|
+
try:
|
478
|
+
code = read_file(file)
|
479
|
+
except UnicodeDecodeError as ex:
|
480
|
+
if not silent:
|
481
|
+
click.secho(
|
482
|
+
f"# Unicode error in {file}: {ex}", err=True, fg="yellow"
|
483
|
+
)
|
454
484
|
continue
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
else:
|
463
|
-
# else print absolute path
|
464
|
-
path = file.resolve()
|
465
|
-
snippet, line_no = code_for_node(code, node, class_name, signatures, docs)
|
466
|
-
if replace:
|
467
|
-
replace_matches.append((file.resolve(), snippet, line_no))
|
485
|
+
try:
|
486
|
+
nodes = find_symbol_nodes(code, str(file), symbols)
|
487
|
+
except SyntaxError as ex:
|
488
|
+
if not silent:
|
489
|
+
click.secho(
|
490
|
+
f"# Syntax error in {file}: {ex}", err=True, fg="yellow"
|
491
|
+
)
|
468
492
|
continue
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
493
|
+
for node, class_name in nodes:
|
494
|
+
if not filter(node):
|
495
|
+
continue
|
496
|
+
if count or check:
|
497
|
+
num_matches += 1
|
498
|
+
if count or not signatures:
|
499
|
+
continue
|
500
|
+
# If file is within pwd, print relative path
|
501
|
+
if pwd in file.resolve().parents:
|
502
|
+
path = file.resolve().relative_to(pwd)
|
503
|
+
else:
|
504
|
+
# else print absolute path
|
505
|
+
path = file.resolve()
|
506
|
+
snippet, line_no = code_for_node(
|
507
|
+
code, node, class_name, signatures, docs
|
478
508
|
)
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
509
|
+
if replace:
|
510
|
+
replace_matches.append((file.resolve(), snippet, line_no))
|
511
|
+
continue
|
512
|
+
|
513
|
+
output_identifier_line = None
|
514
|
+
output_import_line = None
|
515
|
+
symbol_id = None
|
516
|
+
|
517
|
+
if not no_file:
|
518
|
+
bits = ["# File:", path]
|
519
|
+
if class_name:
|
520
|
+
bits.extend(["Class:", class_name])
|
521
|
+
bits.extend(["Line:", line_no])
|
522
|
+
symbol_id = "{}:{}".format(path, line_no)
|
523
|
+
output_identifier_line = " ".join(str(bit) for bit in bits)
|
524
|
+
if imports:
|
525
|
+
import_line = import_line_for_function(
|
526
|
+
node.name, path, sys_paths or directories
|
483
527
|
)
|
484
|
-
|
485
|
-
|
528
|
+
# If it's a class then output '# from x import Class' instead
|
529
|
+
if class_name:
|
530
|
+
import_line = (
|
531
|
+
import_line.split(" import ")[0] + " import " + class_name
|
532
|
+
)
|
533
|
+
symbol_id = import_line
|
534
|
+
output_import_line = "# " + import_line
|
535
|
+
|
536
|
+
yield Output(
|
537
|
+
symbol_id, output_identifier_line, output_import_line, snippet
|
538
|
+
)
|
539
|
+
|
540
|
+
if sum(output_formats) == 0:
|
541
|
+
for item in stuff_to_output():
|
542
|
+
if item.output_identifier_line:
|
543
|
+
click.echo(item.output_identifier_line)
|
544
|
+
if item.output_import_line:
|
545
|
+
click.echo(item.output_import_line)
|
546
|
+
click.echo(item.snippet)
|
486
547
|
click.echo()
|
548
|
+
else:
|
549
|
+
# Do the fancy output formats thing
|
550
|
+
to_output(
|
551
|
+
sys.stdout,
|
552
|
+
((id_prefix + item.symbol_id, item.snippet) for item in stuff_to_output()),
|
553
|
+
format="csv" if csv_ else "tsv" if tsv else "json" if json_ else "nl",
|
554
|
+
)
|
555
|
+
return
|
556
|
+
|
487
557
|
if count:
|
488
558
|
click.echo(num_matches)
|
489
559
|
|
@@ -531,13 +601,40 @@ def cli(
|
|
531
601
|
filepath.write_text(new, "utf-8")
|
532
602
|
|
533
603
|
|
534
|
-
def is_subpath(path: pathlib.Path, parent: pathlib.Path) -> bool:
|
535
|
-
try:
|
536
|
-
path.relative_to(parent)
|
537
|
-
return True
|
538
|
-
except ValueError:
|
539
|
-
return False
|
540
|
-
|
541
|
-
|
542
604
|
def is_dunder(name):
|
543
605
|
return name.startswith("__") and name.endswith("__")
|
606
|
+
|
607
|
+
|
608
|
+
def to_output(
|
609
|
+
fp: TextIO,
|
610
|
+
lines: Iterable[Tuple[str, str]],
|
611
|
+
format: Literal["csv", "tsv", "json", "nl"] = "csv",
|
612
|
+
) -> None:
|
613
|
+
if format == "nl":
|
614
|
+
for id, content in lines:
|
615
|
+
line = json.dumps({"id": id, "code": content})
|
616
|
+
fp.write(line + "\n")
|
617
|
+
return
|
618
|
+
|
619
|
+
elif format == "json":
|
620
|
+
fp.write("[")
|
621
|
+
first = True
|
622
|
+
for id, content in lines:
|
623
|
+
line = json.dumps({"id": id, "code": content})
|
624
|
+
if first:
|
625
|
+
fp.write(line)
|
626
|
+
first = False
|
627
|
+
else:
|
628
|
+
fp.write(",\n " + line)
|
629
|
+
fp.write("]\n")
|
630
|
+
return
|
631
|
+
|
632
|
+
dialect = "excel" if format == "csv" else "excel-tab"
|
633
|
+
writer = csv.writer(fp, dialect=dialect)
|
634
|
+
|
635
|
+
# Write header
|
636
|
+
writer.writerow(["id", "code"])
|
637
|
+
|
638
|
+
# Write content
|
639
|
+
for id, content in lines:
|
640
|
+
writer.writerow([id, content])
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: symbex
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.4.1
|
4
4
|
Summary: Find the Python code for specified symbols
|
5
5
|
Home-page: https://github.com/simonw/symbex
|
6
6
|
Author: Simon Willison
|
@@ -13,13 +13,23 @@ Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
14
14
|
Requires-Dist: click
|
15
15
|
Provides-Extra: test
|
16
|
-
Requires-Dist: pytest
|
17
|
-
Requires-Dist: pytest-icdiff
|
18
|
-
Requires-Dist: cogapp
|
19
|
-
Requires-Dist: PyYAML
|
20
|
-
Requires-Dist: ruff
|
21
|
-
|
22
|
-
|
16
|
+
Requires-Dist: pytest; extra == "test"
|
17
|
+
Requires-Dist: pytest-icdiff; extra == "test"
|
18
|
+
Requires-Dist: cogapp; extra == "test"
|
19
|
+
Requires-Dist: PyYAML; extra == "test"
|
20
|
+
Requires-Dist: ruff; extra == "test"
|
21
|
+
Dynamic: author
|
22
|
+
Dynamic: description
|
23
|
+
Dynamic: description-content-type
|
24
|
+
Dynamic: home-page
|
25
|
+
Dynamic: license
|
26
|
+
Dynamic: project-url
|
27
|
+
Dynamic: provides-extra
|
28
|
+
Dynamic: requires-dist
|
29
|
+
Dynamic: requires-python
|
30
|
+
Dynamic: summary
|
31
|
+
|
32
|
+
# Symbex
|
23
33
|
|
24
34
|
[](https://pypi.org/project/symbex/)
|
25
35
|
[](https://github.com/simonw/symbex/releases)
|
@@ -28,7 +38,7 @@ Requires-Dist: ruff ; extra == 'test'
|
|
28
38
|
|
29
39
|
Find the Python code for specified symbols
|
30
40
|
|
31
|
-
Read [
|
41
|
+
Read [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project.
|
32
42
|
|
33
43
|
## Installation
|
34
44
|
|
@@ -313,6 +323,42 @@ Or to count every async test function:
|
|
313
323
|
```bash
|
314
324
|
symbex --async 'test_*' --count
|
315
325
|
```
|
326
|
+
## Structured output
|
327
|
+
|
328
|
+
LLM defaults to outputting plain text (actually valid Python code, thanks to the way it uses comments).
|
329
|
+
|
330
|
+
You can request output in CSV, TSV, JSON or newline-delimited JSON instead, using the following options:
|
331
|
+
|
332
|
+
- `--json`: a JSON array, `[{"id": "...", "code": "..."}]`
|
333
|
+
- `--nl`: newline-delimited JSON, `{"id": "...", "code": "..."}` per line
|
334
|
+
- `--csv`: CSV with `id,code` as the heading row
|
335
|
+
- `--tsv`: TSV with `id\tcode` as the heading row
|
336
|
+
|
337
|
+
In each case the ID will be the path to the file containing the symbol, followed by a colon, followed by the line number of the symbol, for example:
|
338
|
+
|
339
|
+
```json
|
340
|
+
{
|
341
|
+
"id": "symbex/lib.py:82",
|
342
|
+
"code": "def match(name: str, symbols: Iterable[str]) -> bool:"
|
343
|
+
}
|
344
|
+
```
|
345
|
+
If you pass `-i/--imports` the ID will be the import line instead:
|
346
|
+
```json
|
347
|
+
{
|
348
|
+
"id": "from symbex.lib import match",
|
349
|
+
"code": "def match(name: str, symbols: Iterable[str]) -> bool:"
|
350
|
+
}
|
351
|
+
```
|
352
|
+
Pass `--id-prefix 'something:'` to add the specified prefix to the start of each ID.
|
353
|
+
|
354
|
+
This example will generate a CSV file of all of your test functions, using the import style of IDs and a prefix of `test:`:
|
355
|
+
|
356
|
+
```bash
|
357
|
+
symbex 'test_*' \
|
358
|
+
--function \
|
359
|
+
--imports \
|
360
|
+
--csv > tests.csv
|
361
|
+
```
|
316
362
|
|
317
363
|
## Using with LLM
|
318
364
|
|
@@ -329,6 +375,20 @@ And got back this:
|
|
329
375
|
|
330
376
|
> This code defines a custom `Response` class with methods for returning HTTP responses. It includes methods for setting cookies, returning HTML, text, and JSON responses, and redirecting to a different URL. The `asgi_send` method sends the response to the client using the ASGI (Asynchronous Server Gateway Interface) protocol.
|
331
377
|
|
378
|
+
The structured output feature is designed to be used with [LLM embeddings](https://llm.datasette.io/en/stable/embeddings/index.html). You can generate embeddings for every symbol in your codebase using [llm embed-multi](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed-multi) like this:
|
379
|
+
|
380
|
+
```bash
|
381
|
+
symbex '*' '*:*' --nl | \
|
382
|
+
llm embed-multi symbols - \
|
383
|
+
--format nl --database embeddings.db --store
|
384
|
+
```
|
385
|
+
This creates a database in `embeddings.db` containing all of your symbols along with embedding vectors.
|
386
|
+
|
387
|
+
You can then search your code like this:
|
388
|
+
```bash
|
389
|
+
llm similar symbols -d embeddings.db -c 'test csv' | jq
|
390
|
+
```
|
391
|
+
|
332
392
|
## Replacing a matched symbol
|
333
393
|
|
334
394
|
The `--replace` option can be used to replace a single matched symbol with content piped in to standard input.
|
@@ -525,6 +585,11 @@ Options:
|
|
525
585
|
--check Exit with non-zero code if any matches found
|
526
586
|
--replace Replace matching symbol with text from stdin
|
527
587
|
--rexec TEXT Replace with the result of piping to this tool
|
588
|
+
--csv Output as CSV
|
589
|
+
--tsv Output as TSV
|
590
|
+
--json Output as JSON
|
591
|
+
--nl Output as newline-delimited JSON
|
592
|
+
--id-prefix TEXT Prefix to use for symbol IDs
|
528
593
|
--help Show this message and exit.
|
529
594
|
|
530
595
|
```
|
@@ -0,0 +1,10 @@
|
|
1
|
+
symbex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
symbex/__main__.py,sha256=8hDtWlaFZK24KhfNq_ZKgtXqYHsDQDetukOCMlsbW0Q,59
|
3
|
+
symbex/cli.py,sha256=kGCltpO79yjVpXEerJlae5Sc22kOp3evy08dy0SZcU0,18096
|
4
|
+
symbex/lib.py,sha256=CiKKOOyc6Ne_7igzNItMZpa5I6o12LEbIPrQU9al7Ro,11436
|
5
|
+
symbex-1.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
6
|
+
symbex-1.4.1.dist-info/METADATA,sha256=vUyXJ1kEdUDfM_ye3sE7LDqjCcHfEavgEfEzED9vpbM,21564
|
7
|
+
symbex-1.4.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
8
|
+
symbex-1.4.1.dist-info/entry_points.txt,sha256=YgMSEfEGqNMHM9RysFObH8lkQKVZKyymKLnXbVue_Uk,42
|
9
|
+
symbex-1.4.1.dist-info/top_level.txt,sha256=qwle8HjAaYgpdMIHlJcTcN4gaG4wmDqUvkt54beTBTs,7
|
10
|
+
symbex-1.4.1.dist-info/RECORD,,
|
symbex-1.3.1.dist-info/RECORD
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
symbex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
symbex/__main__.py,sha256=8hDtWlaFZK24KhfNq_ZKgtXqYHsDQDetukOCMlsbW0Q,59
|
3
|
-
symbex/cli.py,sha256=uuP28l7qAWWPE3b0u2HLM1ID89m402EBNgYOhcejdgw,14974
|
4
|
-
symbex/lib.py,sha256=CiKKOOyc6Ne_7igzNItMZpa5I6o12LEbIPrQU9al7Ro,11436
|
5
|
-
symbex-1.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
6
|
-
symbex-1.3.1.dist-info/METADATA,sha256=1oJBLr7_LdzFltBb5cMMCttPFo-XZhFGOzK_1MOQ9ls,19263
|
7
|
-
symbex-1.3.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
8
|
-
symbex-1.3.1.dist-info/entry_points.txt,sha256=YgMSEfEGqNMHM9RysFObH8lkQKVZKyymKLnXbVue_Uk,42
|
9
|
-
symbex-1.3.1.dist-info/top_level.txt,sha256=qwle8HjAaYgpdMIHlJcTcN4gaG4wmDqUvkt54beTBTs,7
|
10
|
-
symbex-1.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|