symbex 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
symbex/cli.py CHANGED
@@ -1,11 +1,15 @@
1
1
  import ast
2
2
  import click
3
+ import csv
4
+ import dataclasses
3
5
  import importlib
4
6
  import inspect
7
+ import json
5
8
  import pathlib
6
9
  import site
7
10
  import subprocess
8
11
  import sys
12
+ from typing import TextIO, Iterable, Literal, Tuple
9
13
 
10
14
  from .lib import (
11
15
  code_for_node,
@@ -16,6 +20,14 @@ from .lib import (
16
20
  )
17
21
 
18
22
 
23
+ @dataclasses.dataclass
24
+ class Output:
25
+ symbol_id: str
26
+ output_identifier_line: str
27
+ output_import_line: str
28
+ snippet: str
29
+
30
+
19
31
  @click.command()
20
32
  @click.version_option()
21
33
  @click.argument("symbols", nargs=-1)
@@ -169,6 +181,12 @@ from .lib import (
169
181
  help="Replace matching symbol with text from stdin",
170
182
  )
171
183
  @click.option("--rexec", help="Replace with the result of piping to this tool")
184
+ # Output options
185
+ @click.option("csv_", "--csv", is_flag=True, help="Output as CSV")
186
+ @click.option("--tsv", is_flag=True, help="Output as TSV")
187
+ @click.option("json_", "--json", is_flag=True, help="Output as JSON")
188
+ @click.option("--nl", is_flag=True, help="Output as newline-delimited JSON")
189
+ @click.option("--id-prefix", help="Prefix to use for symbol IDs")
172
190
  def cli(
173
191
  symbols,
174
192
  files,
@@ -200,6 +218,11 @@ def cli(
200
218
  check,
201
219
  replace,
202
220
  rexec,
221
+ csv_,
222
+ tsv,
223
+ json_,
224
+ nl,
225
+ id_prefix,
203
226
  ):
204
227
  """
205
228
  Find symbols in Python code and print the code for them.
@@ -258,6 +281,17 @@ def cli(
258
281
  symbex first_function --rexec "sed 's/^/# /'"
259
282
  # This uses sed to comment out the function body
260
283
  """
284
+ # Only one of --json, --csv, --tsv, --nl
285
+ output_formats = [csv_, tsv, json_, nl]
286
+ if sum(output_formats) > 1:
287
+ raise click.ClickException("Only one of --csv, --tsv, --json, --nl can be used")
288
+ if id_prefix and not sum(output_formats):
289
+ raise click.ClickException(
290
+ "--id-prefix can only be used with --csv, --tsv, --json or --nl"
291
+ )
292
+ if id_prefix is None:
293
+ id_prefix = ""
294
+
261
295
  if modules:
262
296
  module_dirs = []
263
297
  module_files = []
@@ -362,7 +396,7 @@ def cli(
362
396
  for directory in directories:
363
397
  for path in pathlib.Path(directory).rglob("*.py"):
364
398
  # Skip if path is inside any of 'excludes'
365
- if any(is_subpath(path, exclude) for exclude in excludes):
399
+ if any(path.resolve().is_relative_to(exclude) for exclude in excludes):
366
400
  continue
367
401
  if path.is_file():
368
402
  yield path
@@ -436,54 +470,90 @@ def cli(
436
470
  pwd = pathlib.Path(".").resolve()
437
471
  num_matches = 0
438
472
  replace_matches = []
439
- for file in iterate_files():
440
- try:
441
- code = read_file(file)
442
- except UnicodeDecodeError as ex:
443
- if not silent:
444
- click.secho(f"# Unicode error in {file}: {ex}", err=True, fg="yellow")
445
- continue
446
- try:
447
- nodes = find_symbol_nodes(code, str(file), symbols)
448
- except SyntaxError as ex:
449
- if not silent:
450
- click.secho(f"# Syntax error in {file}: {ex}", err=True, fg="yellow")
451
- continue
452
- for node, class_name in nodes:
453
- if not filter(node):
473
+
474
+ def stuff_to_output():
475
+ nonlocal num_matches
476
+ for file in iterate_files():
477
+ try:
478
+ code = read_file(file)
479
+ except UnicodeDecodeError as ex:
480
+ if not silent:
481
+ click.secho(
482
+ f"# Unicode error in {file}: {ex}", err=True, fg="yellow"
483
+ )
454
484
  continue
455
- if count or check:
456
- num_matches += 1
457
- if count or not signatures:
458
- continue
459
- # If file is within pwd, print relative path
460
- if pwd in file.resolve().parents:
461
- path = file.resolve().relative_to(pwd)
462
- else:
463
- # else print absolute path
464
- path = file.resolve()
465
- snippet, line_no = code_for_node(code, node, class_name, signatures, docs)
466
- if replace:
467
- replace_matches.append((file.resolve(), snippet, line_no))
485
+ try:
486
+ nodes = find_symbol_nodes(code, str(file), symbols)
487
+ except SyntaxError as ex:
488
+ if not silent:
489
+ click.secho(
490
+ f"# Syntax error in {file}: {ex}", err=True, fg="yellow"
491
+ )
468
492
  continue
469
- if not no_file:
470
- bits = ["# File:", path]
471
- if class_name:
472
- bits.extend(["Class:", class_name])
473
- bits.extend(["Line:", line_no])
474
- click.echo(" ".join(str(bit) for bit in bits))
475
- if imports:
476
- import_line = import_line_for_function(
477
- node.name, path, sys_paths or directories
493
+ for node, class_name in nodes:
494
+ if not filter(node):
495
+ continue
496
+ if count or check:
497
+ num_matches += 1
498
+ if count or not signatures:
499
+ continue
500
+ # If file is within pwd, print relative path
501
+ if pwd in file.resolve().parents:
502
+ path = file.resolve().relative_to(pwd)
503
+ else:
504
+ # else print absolute path
505
+ path = file.resolve()
506
+ snippet, line_no = code_for_node(
507
+ code, node, class_name, signatures, docs
478
508
  )
479
- # If it's a class then output '# from x import Class' instead
480
- if class_name:
481
- import_line = (
482
- import_line.split(" import ")[0] + " import " + class_name
509
+ if replace:
510
+ replace_matches.append((file.resolve(), snippet, line_no))
511
+ continue
512
+
513
+ output_identifier_line = None
514
+ output_import_line = None
515
+ symbol_id = None
516
+
517
+ if not no_file:
518
+ bits = ["# File:", path]
519
+ if class_name:
520
+ bits.extend(["Class:", class_name])
521
+ bits.extend(["Line:", line_no])
522
+ symbol_id = "{}:{}".format(path, line_no)
523
+ output_identifier_line = " ".join(str(bit) for bit in bits)
524
+ if imports:
525
+ import_line = import_line_for_function(
526
+ node.name, path, sys_paths or directories
483
527
  )
484
- click.echo("# " + import_line)
485
- click.echo(snippet)
528
+ # If it's a class then output '# from x import Class' instead
529
+ if class_name:
530
+ import_line = (
531
+ import_line.split(" import ")[0] + " import " + class_name
532
+ )
533
+ symbol_id = import_line
534
+ output_import_line = "# " + import_line
535
+
536
+ yield Output(
537
+ symbol_id, output_identifier_line, output_import_line, snippet
538
+ )
539
+
540
+ if sum(output_formats) == 0:
541
+ for item in stuff_to_output():
542
+ if item.output_identifier_line:
543
+ click.echo(item.output_identifier_line)
544
+ if item.output_import_line:
545
+ click.echo(item.output_import_line)
546
+ click.echo(item.snippet)
486
547
  click.echo()
548
+ else:
549
+ # Do the fancy output formats thing
550
+ to_output(
551
+ sys.stdout,
552
+ ((id_prefix + item.symbol_id, item.snippet) for item in stuff_to_output()),
553
+ format="csv" if csv_ else "tsv" if tsv else "json" if json_ else "nl",
554
+ )
555
+ return
556
+
487
557
  if count:
488
558
  click.echo(num_matches)
489
559
 
@@ -531,13 +601,40 @@ def cli(
531
601
  filepath.write_text(new, "utf-8")
532
602
 
533
603
 
534
- def is_subpath(path: pathlib.Path, parent: pathlib.Path) -> bool:
535
- try:
536
- path.relative_to(parent)
537
- return True
538
- except ValueError:
539
- return False
540
-
541
-
542
604
  def is_dunder(name):
543
605
  return name.startswith("__") and name.endswith("__")
606
+
607
+
608
+ def to_output(
609
+ fp: TextIO,
610
+ lines: Iterable[Tuple[str, str]],
611
+ format: Literal["csv", "tsv", "json", "nl"] = "csv",
612
+ ) -> None:
613
+ if format == "nl":
614
+ for id, content in lines:
615
+ line = json.dumps({"id": id, "code": content})
616
+ fp.write(line + "\n")
617
+ return
618
+
619
+ elif format == "json":
620
+ fp.write("[")
621
+ first = True
622
+ for id, content in lines:
623
+ line = json.dumps({"id": id, "code": content})
624
+ if first:
625
+ fp.write(line)
626
+ first = False
627
+ else:
628
+ fp.write(",\n " + line)
629
+ fp.write("]\n")
630
+ return
631
+
632
+ dialect = "excel" if format == "csv" else "excel-tab"
633
+ writer = csv.writer(fp, dialect=dialect)
634
+
635
+ # Write header
636
+ writer.writerow(["id", "code"])
637
+
638
+ # Write content
639
+ for id, content in lines:
640
+ writer.writerow([id, content])
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: symbex
3
- Version: 1.3.1
3
+ Version: 1.4.1
4
4
  Summary: Find the Python code for specified symbols
5
5
  Home-page: https://github.com/simonw/symbex
6
6
  Author: Simon Willison
@@ -13,13 +13,23 @@ Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: click
15
15
  Provides-Extra: test
16
- Requires-Dist: pytest ; extra == 'test'
17
- Requires-Dist: pytest-icdiff ; extra == 'test'
18
- Requires-Dist: cogapp ; extra == 'test'
19
- Requires-Dist: PyYAML ; extra == 'test'
20
- Requires-Dist: ruff ; extra == 'test'
21
-
22
- # symbex
16
+ Requires-Dist: pytest; extra == "test"
17
+ Requires-Dist: pytest-icdiff; extra == "test"
18
+ Requires-Dist: cogapp; extra == "test"
19
+ Requires-Dist: PyYAML; extra == "test"
20
+ Requires-Dist: ruff; extra == "test"
21
+ Dynamic: author
22
+ Dynamic: description
23
+ Dynamic: description-content-type
24
+ Dynamic: home-page
25
+ Dynamic: license
26
+ Dynamic: project-url
27
+ Dynamic: provides-extra
28
+ Dynamic: requires-dist
29
+ Dynamic: requires-python
30
+ Dynamic: summary
31
+
32
+ # Symbex
23
33
 
24
34
  [![PyPI](https://img.shields.io/pypi/v/symbex.svg)](https://pypi.org/project/symbex/)
25
35
  [![Changelog](https://img.shields.io/github/v/release/simonw/symbex?include_prereleases&label=changelog)](https://github.com/simonw/symbex/releases)
@@ -28,7 +38,7 @@ Requires-Dist: ruff ; extra == 'test'
28
38
 
29
39
  Find the Python code for specified symbols
30
40
 
31
- Read [symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project.
41
+ Read [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project.
32
42
 
33
43
  ## Installation
34
44
 
@@ -313,6 +323,42 @@ Or to count every async test function:
313
323
  ```bash
314
324
  symbex --async 'test_*' --count
315
325
  ```
326
+ ## Structured output
327
+
328
+ LLM defaults to outputting plain text (actually valid Python code, thanks to the way it uses comments).
329
+
330
+ You can request output in CSV, TSV, JSON or newline-delimited JSON instead, using the following options:
331
+
332
+ - `--json`: a JSON array, `[{"id": "...", "code": "..."}]`
333
+ - `--nl`: newline-delimited JSON, `{"id": "...", "code": "..."}` per line
334
+ - `--csv`: CSV with `id,code` as the heading row
335
+ - `--tsv`: TSV with `id\tcode` as the heading row
336
+
337
+ In each case the ID will be the path to the file containing the symbol, followed by a colon, followed by the line number of the symbol, for example:
338
+
339
+ ```json
340
+ {
341
+ "id": "symbex/lib.py:82",
342
+ "code": "def match(name: str, symbols: Iterable[str]) -> bool:"
343
+ }
344
+ ```
345
+ If you pass `-i/--imports` the ID will be the import line instead:
346
+ ```json
347
+ {
348
+ "id": "from symbex.lib import match",
349
+ "code": "def match(name: str, symbols: Iterable[str]) -> bool:"
350
+ }
351
+ ```
352
+ Pass `--id-prefix 'something:'` to add the specified prefix to the start of each ID.
353
+
354
+ This example will generate a CSV file of all of your test functions, using the import style of IDs and a prefix of `test:`:
355
+
356
+ ```bash
357
+ symbex 'test_*' \
358
+ --function \
359
+ --imports \
360
+ --csv > tests.csv
361
+ ```
316
362
 
317
363
  ## Using with LLM
318
364
 
@@ -329,6 +375,20 @@ And got back this:
329
375
 
330
376
  > This code defines a custom `Response` class with methods for returning HTTP responses. It includes methods for setting cookies, returning HTML, text, and JSON responses, and redirecting to a different URL. The `asgi_send` method sends the response to the client using the ASGI (Asynchronous Server Gateway Interface) protocol.
331
377
 
378
+ The structured output feature is designed to be used with [LLM embeddings](https://llm.datasette.io/en/stable/embeddings/index.html). You can generate embeddings for every symbol in your codebase using [llm embed-multi](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed-multi) like this:
379
+
380
+ ```bash
381
+ symbex '*' '*:*' --nl | \
382
+ llm embed-multi symbols - \
383
+ --format nl --database embeddings.db --store
384
+ ```
385
+ This creates a database in `embeddings.db` containing all of your symbols along with embedding vectors.
386
+
387
+ You can then search your code like this:
388
+ ```bash
389
+ llm similar symbols -d embeddings.db -c 'test csv' | jq
390
+ ```
391
+
332
392
  ## Replacing a matched symbol
333
393
 
334
394
  The `--replace` option can be used to replace a single matched symbol with content piped in to standard input.
@@ -525,6 +585,11 @@ Options:
525
585
  --check Exit with non-zero code if any matches found
526
586
  --replace Replace matching symbol with text from stdin
527
587
  --rexec TEXT Replace with the result of piping to this tool
588
+ --csv Output as CSV
589
+ --tsv Output as TSV
590
+ --json Output as JSON
591
+ --nl Output as newline-delimited JSON
592
+ --id-prefix TEXT Prefix to use for symbol IDs
528
593
  --help Show this message and exit.
529
594
 
530
595
  ```
@@ -0,0 +1,10 @@
1
+ symbex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ symbex/__main__.py,sha256=8hDtWlaFZK24KhfNq_ZKgtXqYHsDQDetukOCMlsbW0Q,59
3
+ symbex/cli.py,sha256=kGCltpO79yjVpXEerJlae5Sc22kOp3evy08dy0SZcU0,18096
4
+ symbex/lib.py,sha256=CiKKOOyc6Ne_7igzNItMZpa5I6o12LEbIPrQU9al7Ro,11436
5
+ symbex-1.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
6
+ symbex-1.4.1.dist-info/METADATA,sha256=vUyXJ1kEdUDfM_ye3sE7LDqjCcHfEavgEfEzED9vpbM,21564
7
+ symbex-1.4.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
+ symbex-1.4.1.dist-info/entry_points.txt,sha256=YgMSEfEGqNMHM9RysFObH8lkQKVZKyymKLnXbVue_Uk,42
9
+ symbex-1.4.1.dist-info/top_level.txt,sha256=qwle8HjAaYgpdMIHlJcTcN4gaG4wmDqUvkt54beTBTs,7
10
+ symbex-1.4.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- symbex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- symbex/__main__.py,sha256=8hDtWlaFZK24KhfNq_ZKgtXqYHsDQDetukOCMlsbW0Q,59
3
- symbex/cli.py,sha256=uuP28l7qAWWPE3b0u2HLM1ID89m402EBNgYOhcejdgw,14974
4
- symbex/lib.py,sha256=CiKKOOyc6Ne_7igzNItMZpa5I6o12LEbIPrQU9al7Ro,11436
5
- symbex-1.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
6
- symbex-1.3.1.dist-info/METADATA,sha256=1oJBLr7_LdzFltBb5cMMCttPFo-XZhFGOzK_1MOQ9ls,19263
7
- symbex-1.3.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
8
- symbex-1.3.1.dist-info/entry_points.txt,sha256=YgMSEfEGqNMHM9RysFObH8lkQKVZKyymKLnXbVue_Uk,42
9
- symbex-1.3.1.dist-info/top_level.txt,sha256=qwle8HjAaYgpdMIHlJcTcN4gaG4wmDqUvkt54beTBTs,7
10
- symbex-1.3.1.dist-info/RECORD,,