symbex 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
symbex/cli.py CHANGED
@@ -1,11 +1,15 @@
1
1
  import ast
2
2
  import click
3
+ import csv
4
+ import dataclasses
3
5
  import importlib
4
6
  import inspect
7
+ import json
5
8
  import pathlib
6
9
  import site
7
10
  import subprocess
8
11
  import sys
12
+ from typing import TextIO, Iterable, Literal, Tuple
9
13
 
10
14
  from .lib import (
11
15
  code_for_node,
@@ -16,6 +20,14 @@ from .lib import (
16
20
  )
17
21
 
18
22
 
23
+ @dataclasses.dataclass
24
+ class Output:
25
+ symbol_id: str
26
+ output_identifier_line: str
27
+ output_import_line: str
28
+ snippet: str
29
+
30
+
19
31
  @click.command()
20
32
  @click.version_option()
21
33
  @click.argument("symbols", nargs=-1)
@@ -169,6 +181,12 @@ from .lib import (
169
181
  help="Replace matching symbol with text from stdin",
170
182
  )
171
183
  @click.option("--rexec", help="Replace with the result of piping to this tool")
184
+ # Output options
185
+ @click.option("csv_", "--csv", is_flag=True, help="Output as CSV")
186
+ @click.option("--tsv", is_flag=True, help="Output as TSV")
187
+ @click.option("json_", "--json", is_flag=True, help="Output as JSON")
188
+ @click.option("--nl", is_flag=True, help="Output as newline-delimited JSON")
189
+ @click.option("--id-prefix", help="Prefix to use for symbol IDs")
172
190
  def cli(
173
191
  symbols,
174
192
  files,
@@ -200,6 +218,11 @@ def cli(
200
218
  check,
201
219
  replace,
202
220
  rexec,
221
+ csv_,
222
+ tsv,
223
+ json_,
224
+ nl,
225
+ id_prefix,
203
226
  ):
204
227
  """
205
228
  Find symbols in Python code and print the code for them.
@@ -258,6 +281,17 @@ def cli(
258
281
  symbex first_function --rexec "sed 's/^/# /'"
259
282
  # This uses sed to comment out the function body
260
283
  """
284
+ # Only one of --json, --csv, --tsv, --nl
285
+ output_formats = [csv_, tsv, json_, nl]
286
+ if sum(output_formats) > 1:
287
+ raise click.ClickException("Only one of --csv, --tsv, --json, --nl can be used")
288
+ if id_prefix and not sum(output_formats):
289
+ raise click.ClickException(
290
+ "--id-prefix can only be used with --csv, --tsv, --json or --nl"
291
+ )
292
+ if id_prefix is None:
293
+ id_prefix = ""
294
+
261
295
  if modules:
262
296
  module_dirs = []
263
297
  module_files = []
@@ -362,7 +396,7 @@ def cli(
362
396
  for directory in directories:
363
397
  for path in pathlib.Path(directory).rglob("*.py"):
364
398
  # Skip if path is inside any of 'excludes'
365
- if any(is_subpath(path, exclude) for exclude in excludes):
399
+ if any(path.resolve().is_relative_to(exclude) for exclude in excludes):
366
400
  continue
367
401
  if path.is_file():
368
402
  yield path
@@ -436,54 +470,90 @@ def cli(
436
470
  pwd = pathlib.Path(".").resolve()
437
471
  num_matches = 0
438
472
  replace_matches = []
439
- for file in iterate_files():
440
- try:
441
- code = read_file(file)
442
- except UnicodeDecodeError as ex:
443
- if not silent:
444
- click.secho(f"# Unicode error in {file}: {ex}", err=True, fg="yellow")
445
- continue
446
- try:
447
- nodes = find_symbol_nodes(code, str(file), symbols)
448
- except SyntaxError as ex:
449
- if not silent:
450
- click.secho(f"# Syntax error in {file}: {ex}", err=True, fg="yellow")
451
- continue
452
- for node, class_name in nodes:
453
- if not filter(node):
473
+
474
+ def stuff_to_output():
475
+ nonlocal num_matches
476
+ for file in iterate_files():
477
+ try:
478
+ code = read_file(file)
479
+ except UnicodeDecodeError as ex:
480
+ if not silent:
481
+ click.secho(
482
+ f"# Unicode error in {file}: {ex}", err=True, fg="yellow"
483
+ )
454
484
  continue
455
- if count or check:
456
- num_matches += 1
457
- if count or not signatures:
458
- continue
459
- # If file is within pwd, print relative path
460
- if pwd in file.resolve().parents:
461
- path = file.resolve().relative_to(pwd)
462
- else:
463
- # else print absolute path
464
- path = file.resolve()
465
- snippet, line_no = code_for_node(code, node, class_name, signatures, docs)
466
- if replace:
467
- replace_matches.append((file.resolve(), snippet, line_no))
485
+ try:
486
+ nodes = find_symbol_nodes(code, str(file), symbols)
487
+ except SyntaxError as ex:
488
+ if not silent:
489
+ click.secho(
490
+ f"# Syntax error in {file}: {ex}", err=True, fg="yellow"
491
+ )
468
492
  continue
469
- if not no_file:
470
- bits = ["# File:", path]
471
- if class_name:
472
- bits.extend(["Class:", class_name])
473
- bits.extend(["Line:", line_no])
474
- click.echo(" ".join(str(bit) for bit in bits))
475
- if imports:
476
- import_line = import_line_for_function(
477
- node.name, path, sys_paths or directories
493
+ for node, class_name in nodes:
494
+ if not filter(node):
495
+ continue
496
+ if count or check:
497
+ num_matches += 1
498
+ if count or not signatures:
499
+ continue
500
+ # If file is within pwd, print relative path
501
+ if pwd in file.resolve().parents:
502
+ path = file.resolve().relative_to(pwd)
503
+ else:
504
+ # else print absolute path
505
+ path = file.resolve()
506
+ snippet, line_no = code_for_node(
507
+ code, node, class_name, signatures, docs
478
508
  )
479
- # If it's a class then output '# from x import Class' instead
480
- if class_name:
481
- import_line = (
482
- import_line.split(" import ")[0] + " import " + class_name
509
+ if replace:
510
+ replace_matches.append((file.resolve(), snippet, line_no))
511
+ continue
512
+
513
+ output_identifier_line = None
514
+ output_import_line = None
515
+ symbol_id = None
516
+
517
+ if not no_file:
518
+ bits = ["# File:", path]
519
+ if class_name:
520
+ bits.extend(["Class:", class_name])
521
+ bits.extend(["Line:", line_no])
522
+ symbol_id = "{}:{}".format(path, line_no)
523
+ output_identifier_line = " ".join(str(bit) for bit in bits)
524
+ if imports:
525
+ import_line = import_line_for_function(
526
+ node.name, path, sys_paths or directories
483
527
  )
484
- click.echo("# " + import_line)
485
- click.echo(snippet)
528
+ # If it's a class then output '# from x import Class' instead
529
+ if class_name:
530
+ import_line = (
531
+ import_line.split(" import ")[0] + " import " + class_name
532
+ )
533
+ symbol_id = import_line
534
+ output_import_line = "# " + import_line
535
+
536
+ yield Output(
537
+ symbol_id, output_identifier_line, output_import_line, snippet
538
+ )
539
+
540
+ if sum(output_formats) == 0:
541
+ for item in stuff_to_output():
542
+ if item.output_identifier_line:
543
+ click.echo(item.output_identifier_line)
544
+ if item.output_import_line:
545
+ click.echo(item.output_import_line)
546
+ click.echo(item.snippet)
486
547
  click.echo()
548
+ else:
549
+ # Do the fancy output formats thing
550
+ to_output(
551
+ sys.stdout,
552
+ ((id_prefix + item.symbol_id, item.snippet) for item in stuff_to_output()),
553
+ format="csv" if csv_ else "tsv" if tsv else "json" if json_ else "nl",
554
+ )
555
+ return
556
+
487
557
  if count:
488
558
  click.echo(num_matches)
489
559
 
@@ -531,13 +601,40 @@ def cli(
531
601
  filepath.write_text(new, "utf-8")
532
602
 
533
603
 
534
- def is_subpath(path: pathlib.Path, parent: pathlib.Path) -> bool:
535
- try:
536
- path.relative_to(parent)
537
- return True
538
- except ValueError:
539
- return False
540
-
541
-
542
604
  def is_dunder(name):
543
605
  return name.startswith("__") and name.endswith("__")
606
+
607
+
608
+ def to_output(
609
+ fp: TextIO,
610
+ lines: Iterable[Tuple[str, str]],
611
+ format: Literal["csv", "tsv", "json", "nl"] = "csv",
612
+ ) -> None:
613
+ if format == "nl":
614
+ for id, content in lines:
615
+ line = json.dumps({"id": id, "code": content})
616
+ fp.write(line + "\n")
617
+ return
618
+
619
+ elif format == "json":
620
+ fp.write("[")
621
+ first = True
622
+ for id, content in lines:
623
+ line = json.dumps({"id": id, "code": content})
624
+ if first:
625
+ fp.write(line)
626
+ first = False
627
+ else:
628
+ fp.write(",\n " + line)
629
+ fp.write("]\n")
630
+ return
631
+
632
+ dialect = "excel" if format == "csv" else "excel-tab"
633
+ writer = csv.writer(fp, dialect=dialect)
634
+
635
+ # Write header
636
+ writer.writerow(["id", "code"])
637
+
638
+ # Write content
639
+ for id, content in lines:
640
+ writer.writerow([id, content])
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: symbex
3
- Version: 1.3.1
3
+ Version: 1.4.1
4
4
  Summary: Find the Python code for specified symbols
5
5
  Home-page: https://github.com/simonw/symbex
6
6
  Author: Simon Willison
@@ -13,13 +13,23 @@ Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: click
15
15
  Provides-Extra: test
16
- Requires-Dist: pytest ; extra == 'test'
17
- Requires-Dist: pytest-icdiff ; extra == 'test'
18
- Requires-Dist: cogapp ; extra == 'test'
19
- Requires-Dist: PyYAML ; extra == 'test'
20
- Requires-Dist: ruff ; extra == 'test'
21
-
22
- # symbex
16
+ Requires-Dist: pytest; extra == "test"
17
+ Requires-Dist: pytest-icdiff; extra == "test"
18
+ Requires-Dist: cogapp; extra == "test"
19
+ Requires-Dist: PyYAML; extra == "test"
20
+ Requires-Dist: ruff; extra == "test"
21
+ Dynamic: author
22
+ Dynamic: description
23
+ Dynamic: description-content-type
24
+ Dynamic: home-page
25
+ Dynamic: license
26
+ Dynamic: project-url
27
+ Dynamic: provides-extra
28
+ Dynamic: requires-dist
29
+ Dynamic: requires-python
30
+ Dynamic: summary
31
+
32
+ # Symbex
23
33
 
24
34
  [![PyPI](https://img.shields.io/pypi/v/symbex.svg)](https://pypi.org/project/symbex/)
25
35
  [![Changelog](https://img.shields.io/github/v/release/simonw/symbex?include_prereleases&label=changelog)](https://github.com/simonw/symbex/releases)
@@ -28,7 +38,7 @@ Requires-Dist: ruff ; extra == 'test'
28
38
 
29
39
  Find the Python code for specified symbols
30
40
 
31
- Read [symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project.
41
+ Read [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project.
32
42
 
33
43
  ## Installation
34
44
 
@@ -313,6 +323,42 @@ Or to count every async test function:
313
323
  ```bash
314
324
  symbex --async 'test_*' --count
315
325
  ```
326
+ ## Structured output
327
+
328
+ LLM defaults to outputting plain text (actually valid Python code, thanks to the way it uses comments).
329
+
330
+ You can request output in CSV, TSV, JSON or newline-delimited JSON instead, using the following options:
331
+
332
+ - `--json`: a JSON array, `[{"id": "...", "code": "..."}]`
333
+ - `--nl`: newline-delimited JSON, `{"id": "...", "code": "..."}` per line
334
+ - `--csv`: CSV with `id,code` as the heading row
335
+ - `--tsv`: TSV with `id\tcode` as the heading row
336
+
337
+ In each case the ID will be the path to the file containing the symbol, followed by a colon, followed by the line number of the symbol, for example:
338
+
339
+ ```json
340
+ {
341
+ "id": "symbex/lib.py:82",
342
+ "code": "def match(name: str, symbols: Iterable[str]) -> bool:"
343
+ }
344
+ ```
345
+ If you pass `-i/--imports` the ID will be the import line instead:
346
+ ```json
347
+ {
348
+ "id": "from symbex.lib import match",
349
+ "code": "def match(name: str, symbols: Iterable[str]) -> bool:"
350
+ }
351
+ ```
352
+ Pass `--id-prefix 'something:'` to add the specified prefix to the start of each ID.
353
+
354
+ This example will generate a CSV file of all of your test functions, using the import style of IDs and a prefix of `test:`:
355
+
356
+ ```bash
357
+ symbex 'test_*' \
358
+ --function \
359
+ --imports \
360
+ --csv > tests.csv
361
+ ```
316
362
 
317
363
  ## Using with LLM
318
364
 
@@ -329,6 +375,20 @@ And got back this:
329
375
 
330
376
  > This code defines a custom `Response` class with methods for returning HTTP responses. It includes methods for setting cookies, returning HTML, text, and JSON responses, and redirecting to a different URL. The `asgi_send` method sends the response to the client using the ASGI (Asynchronous Server Gateway Interface) protocol.
331
377
 
378
+ The structured output feature is designed to be used with [LLM embeddings](https://llm.datasette.io/en/stable/embeddings/index.html). You can generate embeddings for every symbol in your codebase using [llm embed-multi](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed-multi) like this:
379
+
380
+ ```bash
381
+ symbex '*' '*:*' --nl | \
382
+ llm embed-multi symbols - \
383
+ --format nl --database embeddings.db --store
384
+ ```
385
+ This creates a database in `embeddings.db` containing all of your symbols along with embedding vectors.
386
+
387
+ You can then search your code like this:
388
+ ```bash
389
+ llm similar symbols -d embeddings.db -c 'test csv' | jq
390
+ ```
391
+
332
392
  ## Replacing a matched symbol
333
393
 
334
394
  The `--replace` option can be used to replace a single matched symbol with content piped in to standard input.
@@ -525,6 +585,11 @@ Options:
525
585
  --check Exit with non-zero code if any matches found
526
586
  --replace Replace matching symbol with text from stdin
527
587
  --rexec TEXT Replace with the result of piping to this tool
588
+ --csv Output as CSV
589
+ --tsv Output as TSV
590
+ --json Output as JSON
591
+ --nl Output as newline-delimited JSON
592
+ --id-prefix TEXT Prefix to use for symbol IDs
528
593
  --help Show this message and exit.
529
594
 
530
595
  ```
@@ -0,0 +1,10 @@
1
+ symbex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ symbex/__main__.py,sha256=8hDtWlaFZK24KhfNq_ZKgtXqYHsDQDetukOCMlsbW0Q,59
3
+ symbex/cli.py,sha256=kGCltpO79yjVpXEerJlae5Sc22kOp3evy08dy0SZcU0,18096
4
+ symbex/lib.py,sha256=CiKKOOyc6Ne_7igzNItMZpa5I6o12LEbIPrQU9al7Ro,11436
5
+ symbex-1.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
6
+ symbex-1.4.1.dist-info/METADATA,sha256=vUyXJ1kEdUDfM_ye3sE7LDqjCcHfEavgEfEzED9vpbM,21564
7
+ symbex-1.4.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
+ symbex-1.4.1.dist-info/entry_points.txt,sha256=YgMSEfEGqNMHM9RysFObH8lkQKVZKyymKLnXbVue_Uk,42
9
+ symbex-1.4.1.dist-info/top_level.txt,sha256=qwle8HjAaYgpdMIHlJcTcN4gaG4wmDqUvkt54beTBTs,7
10
+ symbex-1.4.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- symbex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- symbex/__main__.py,sha256=8hDtWlaFZK24KhfNq_ZKgtXqYHsDQDetukOCMlsbW0Q,59
3
- symbex/cli.py,sha256=uuP28l7qAWWPE3b0u2HLM1ID89m402EBNgYOhcejdgw,14974
4
- symbex/lib.py,sha256=CiKKOOyc6Ne_7igzNItMZpa5I6o12LEbIPrQU9al7Ro,11436
5
- symbex-1.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
6
- symbex-1.3.1.dist-info/METADATA,sha256=1oJBLr7_LdzFltBb5cMMCttPFo-XZhFGOzK_1MOQ9ls,19263
7
- symbex-1.3.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
8
- symbex-1.3.1.dist-info/entry_points.txt,sha256=YgMSEfEGqNMHM9RysFObH8lkQKVZKyymKLnXbVue_Uk,42
9
- symbex-1.3.1.dist-info/top_level.txt,sha256=qwle8HjAaYgpdMIHlJcTcN4gaG4wmDqUvkt54beTBTs,7
10
- symbex-1.3.1.dist-info/RECORD,,