stata-code 0.7.1__tar.gz → 0.7.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {stata_code-0.7.1 → stata_code-0.7.2}/CHANGELOG.md +27 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/PKG-INFO +8 -5
- {stata_code-0.7.1 → stata_code-0.7.2}/README.md +7 -4
- {stata_code-0.7.1 → stata_code-0.7.2}/pyproject.toml +1 -1
- stata_code-0.7.2/scripts/build_skill_zip.py +105 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/__init__.py +1 -1
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/core/runner.py +101 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/mcp/server.py +376 -1
- stata_code-0.7.2/tests/test_new_tools.py +472 -0
- stata_code-0.7.2/tests/test_skill_package.py +65 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/.gitignore +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/LICENSE +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/LICENSE-POLICY.md +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/PUBLISHING.md +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/SCHEMA.md +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/docs/design/hard_timeout.md +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/examples/01-basic-regression.md +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/examples/02-did-card-krueger.md +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/examples/03-graphs.md +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/examples/04-multi-session.md +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/examples/05-large-matrix.md +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/examples/README.md +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/schema/run_result.schema.json +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/scripts/check_versions.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/scripts/export_schema.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/core/__init__.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/core/_pool.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/core/_refs.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/core/_runtime.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/core/errors.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/core/log_artifacts.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/core/notebook.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/core/run_index.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/core/schema.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/kernel/__init__.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/kernel/__main__.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/kernel/assets/logo-32x32.png +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/kernel/assets/logo-64x64.png +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/kernel/assets/logo-svg.svg +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/kernel/kernel.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/mcp/__init__.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/stata_code/mcp/__main__.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/__init__.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/conftest.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/fixtures/.gitkeep +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_cancel.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_errors.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_kernel.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_log_artifacts.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_mcp.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_mcp_stdio.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_notebook.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_notebook_phase2.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_pool.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_public_api.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_release_versions.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_run_index.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_runner.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_runtime_discovery.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_schema.py +0 -0
- {stata_code-0.7.1 → stata_code-0.7.2}/tests/test_schema_artifact.py +0 -0
|
@@ -6,6 +6,33 @@ to semver-major.minor for the result schema (see `SCHEMA.md` §6).
|
|
|
6
6
|
|
|
7
7
|
## Unreleased
|
|
8
8
|
|
|
9
|
+
## 0.7.2 — 2026-06-20
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- **Three convenience MCP tools** raise the tool surface from 15 to 18:
|
|
14
|
+
- `install_package(name, source?, url?, replace?, session_id?)` — installs a
|
|
15
|
+
community package via `ssc install` / `net install` without the agent
|
|
16
|
+
having to remember the syntax, then verifies it resolves with `which`.
|
|
17
|
+
Package names and URLs are validated to keep them out of the generated
|
|
18
|
+
command line; failures surface the typed `error` block (e.g. `network`).
|
|
19
|
+
- `search_log(ref, pattern, is_regex?, ignore_case?, context?, max_matches?)`
|
|
20
|
+
— greps within a truncated `log://` payload and returns only the matching
|
|
21
|
+
lines (with optional context), so a long log can be inspected without
|
|
22
|
+
pulling the whole transcript back through `get_log`.
|
|
23
|
+
- `inspect_data(varlist?, detail?, session_id?)` — runs `describe` +
|
|
24
|
+
`codebook` and returns the structured `dataset` block plus the codebook
|
|
25
|
+
log: a one-call "what's in this dataset" the agent doesn't have to spell out.
|
|
26
|
+
- **On-demand Stata reference library** under `skills/stata-code/references/`
|
|
27
|
+
(~4,200 lines): topic files for core syntax, data management, econometrics,
|
|
28
|
+
causal inference, panel/time series, graphics, and table export; load-bearing
|
|
29
|
+
`error-codes.md` (the full `rc → kind → fix` table + self-repair loop, aligned
|
|
30
|
+
with the typed-error taxonomy) and `defensive-coding.md`; and per-package notes
|
|
31
|
+
for `reghdfe`, `coefplot`, `estout`, and `gtools`. `SKILL.md` gained a routing
|
|
32
|
+
table (read 1–3 files on demand) and a live-vs-offline execution-mode section.
|
|
33
|
+
- **`scripts/build_skill_zip.py`** packages the skill into a deterministic
|
|
34
|
+
`build/stata-code-skill.zip` for upload as Claude.ai project knowledge.
|
|
35
|
+
|
|
9
36
|
## 0.7.1 — 2026-06-19
|
|
10
37
|
|
|
11
38
|
### Fixed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: stata-code
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.2
|
|
4
4
|
Summary: Agent-native Stata bridge — one core, multiple frontends (MCP, Jupyter, VSCode)
|
|
5
5
|
Project-URL: Homepage, https://github.com/brycewang-stanford/stata-code
|
|
6
6
|
Project-URL: Repository, https://github.com/brycewang-stanford/stata-code
|
|
@@ -188,7 +188,7 @@ claude mcp add stata-code --scope local -- stata-code-mcp
|
|
|
188
188
|
claude mcp add stata-code --scope project -- stata-code-mcp
|
|
189
189
|
```
|
|
190
190
|
|
|
191
|
-
Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its
|
|
191
|
+
Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 18 tools (`stata_run`, `stata_info`, `get_log`, `search_log`, `get_graph`, `get_matrix`, `inspect_data`, `install_package`, `list_sessions`, `cancel_session`, `reset_session`, `notebook_outline`, `notebook_get_cell`, `notebook_locate`, `notebook_edit_cell`, `notebook_insert_cell`, `notebook_delete_cell`, `list_runs`).
|
|
192
192
|
|
|
193
193
|
#### Error Recovery in Agent Workflows
|
|
194
194
|
|
|
@@ -276,15 +276,18 @@ If an OpenAI-backed client reports `API Error: 400 Invalid schema for function
|
|
|
276
276
|
upgrade to `stata-code>=0.6.5`, then restart the MCP client. Older server
|
|
277
277
|
processes keep advertising the stale schema until they are restarted.
|
|
278
278
|
|
|
279
|
-
The MCP server registers
|
|
279
|
+
The MCP server registers 18 tools:
|
|
280
280
|
|
|
281
281
|
| Tool | Purpose |
|
|
282
282
|
| --- | --- |
|
|
283
283
|
| `stata_run` | Execute Stata code and return a v1.0 RunResult JSON |
|
|
284
284
|
| `stata_info` | Report Stata edition, version, and capabilities |
|
|
285
285
|
| `get_log` | Fetch the full log behind a `log://` ref |
|
|
286
|
+
| `search_log` | Search matching lines inside a stored `log://` payload |
|
|
286
287
|
| `get_graph` | Fetch graph bytes behind a `graph://` ref (`ImageContent`) |
|
|
287
288
|
| `get_matrix` | Fetch matrix payloads behind a `matrix://` ref |
|
|
289
|
+
| `inspect_data` | Run `describe` + `codebook` and return compact dataset metadata |
|
|
290
|
+
| `install_package` | Install an SSC or explicit `net install` package and verify it resolves |
|
|
288
291
|
| `list_sessions` | Enumerate live sessions |
|
|
289
292
|
| `cancel_session` | Cancel a session; the subprocess-backed path terminates in-flight runs and short-circuits pending ones |
|
|
290
293
|
| `reset_session` | Drop a session's data |
|
|
@@ -416,7 +419,7 @@ stata_code/
|
|
|
416
419
|
│ ├── runner.py # in-process execute(); collects everything via sfi
|
|
417
420
|
│ └── _pool.py # subprocess workers for public API / MCP hard timeouts
|
|
418
421
|
├── mcp/
|
|
419
|
-
│ └── server.py # MCP server (
|
|
422
|
+
│ └── server.py # MCP server (18 tools)
|
|
420
423
|
└── kernel/
|
|
421
424
|
└── kernel.py # Jupyter kernel
|
|
422
425
|
```
|
|
@@ -454,7 +457,7 @@ stata_code/
|
|
|
454
457
|
- Log truncation with ref store
|
|
455
458
|
- Warning extraction: 5 categories + generic notes
|
|
456
459
|
- 32-kind error taxonomy with canonical suggestions
|
|
457
|
-
- MCP server:
|
|
460
|
+
- MCP server: 18 tools, including notebook navigation / search / atomic edits, the run-bundle index (`list_runs`), log grep (`search_log`), dataset inspection (`inspect_data`), and package installation (`install_package`)
|
|
458
461
|
- Jupyter kernel: rewired to the v1.0 pipeline, kernel logos bundled
|
|
459
462
|
- Matrix size cap + `get_matrix(ref)` for large matrices (>10k cells)
|
|
460
463
|
- Subprocess-backed hard timeout and cancellation for the public Python API and MCP server: `timeout_ms`, `cancel(session_id)`, and MCP `cancel_session`
|
|
@@ -149,7 +149,7 @@ claude mcp add stata-code --scope local -- stata-code-mcp
|
|
|
149
149
|
claude mcp add stata-code --scope project -- stata-code-mcp
|
|
150
150
|
```
|
|
151
151
|
|
|
152
|
-
Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its
|
|
152
|
+
Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 18 tools (`stata_run`, `stata_info`, `get_log`, `search_log`, `get_graph`, `get_matrix`, `inspect_data`, `install_package`, `list_sessions`, `cancel_session`, `reset_session`, `notebook_outline`, `notebook_get_cell`, `notebook_locate`, `notebook_edit_cell`, `notebook_insert_cell`, `notebook_delete_cell`, `list_runs`).
|
|
153
153
|
|
|
154
154
|
#### Error Recovery in Agent Workflows
|
|
155
155
|
|
|
@@ -237,15 +237,18 @@ If an OpenAI-backed client reports `API Error: 400 Invalid schema for function
|
|
|
237
237
|
upgrade to `stata-code>=0.6.5`, then restart the MCP client. Older server
|
|
238
238
|
processes keep advertising the stale schema until they are restarted.
|
|
239
239
|
|
|
240
|
-
The MCP server registers
|
|
240
|
+
The MCP server registers 18 tools:
|
|
241
241
|
|
|
242
242
|
| Tool | Purpose |
|
|
243
243
|
| --- | --- |
|
|
244
244
|
| `stata_run` | Execute Stata code and return a v1.0 RunResult JSON |
|
|
245
245
|
| `stata_info` | Report Stata edition, version, and capabilities |
|
|
246
246
|
| `get_log` | Fetch the full log behind a `log://` ref |
|
|
247
|
+
| `search_log` | Search matching lines inside a stored `log://` payload |
|
|
247
248
|
| `get_graph` | Fetch graph bytes behind a `graph://` ref (`ImageContent`) |
|
|
248
249
|
| `get_matrix` | Fetch matrix payloads behind a `matrix://` ref |
|
|
250
|
+
| `inspect_data` | Run `describe` + `codebook` and return compact dataset metadata |
|
|
251
|
+
| `install_package` | Install an SSC or explicit `net install` package and verify it resolves |
|
|
249
252
|
| `list_sessions` | Enumerate live sessions |
|
|
250
253
|
| `cancel_session` | Cancel a session; the subprocess-backed path terminates in-flight runs and short-circuits pending ones |
|
|
251
254
|
| `reset_session` | Drop a session's data |
|
|
@@ -377,7 +380,7 @@ stata_code/
|
|
|
377
380
|
│ ├── runner.py # in-process execute(); collects everything via sfi
|
|
378
381
|
│ └── _pool.py # subprocess workers for public API / MCP hard timeouts
|
|
379
382
|
├── mcp/
|
|
380
|
-
│ └── server.py # MCP server (
|
|
383
|
+
│ └── server.py # MCP server (18 tools)
|
|
381
384
|
└── kernel/
|
|
382
385
|
└── kernel.py # Jupyter kernel
|
|
383
386
|
```
|
|
@@ -415,7 +418,7 @@ stata_code/
|
|
|
415
418
|
- Log truncation with ref store
|
|
416
419
|
- Warning extraction: 5 categories + generic notes
|
|
417
420
|
- 32-kind error taxonomy with canonical suggestions
|
|
418
|
-
- MCP server:
|
|
421
|
+
- MCP server: 18 tools, including notebook navigation / search / atomic edits, the run-bundle index (`list_runs`), log grep (`search_log`), dataset inspection (`inspect_data`), and package installation (`install_package`)
|
|
419
422
|
- Jupyter kernel: rewired to the v1.0 pipeline, kernel logos bundled
|
|
420
423
|
- Matrix size cap + `get_matrix(ref)` for large matrices (>10k cells)
|
|
421
424
|
- Subprocess-backed hard timeout and cancellation for the public Python API and MCP server: `timeout_ms`, `cancel(session_id)`, and MCP `cancel_session`
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Package the ``stata-code`` skill into a single uploadable ``.zip``.
|
|
2
|
+
|
|
3
|
+
The skill (``skills/stata-code/SKILL.md`` + the ``references/`` library) is
|
|
4
|
+
consumed two ways:
|
|
5
|
+
|
|
6
|
+
* In-repo / Claude Code — read straight from ``skills/stata-code/``.
|
|
7
|
+
* Claude.ai project knowledge — uploaded as a ``.zip``. This script builds
|
|
8
|
+
that archive.
|
|
9
|
+
|
|
10
|
+
The archive contains a single top-level ``stata-code/`` folder so it extracts
|
|
11
|
+
cleanly::
|
|
12
|
+
|
|
13
|
+
stata-code/SKILL.md
|
|
14
|
+
stata-code/references/econometrics.md
|
|
15
|
+
stata-code/references/packages/reghdfe.md
|
|
16
|
+
...
|
|
17
|
+
|
|
18
|
+
Run::
|
|
19
|
+
|
|
20
|
+
python scripts/build_skill_zip.py # -> build/stata-code-skill.zip
|
|
21
|
+
python scripts/build_skill_zip.py -o /tmp/out.zip # custom destination
|
|
22
|
+
|
|
23
|
+
The build is deterministic (sorted entries, fixed timestamps) so re-running it
|
|
24
|
+
on unchanged inputs produces a byte-identical archive.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import argparse
|
|
30
|
+
import sys
|
|
31
|
+
import zipfile
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
|
|
34
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
35
|
+
SKILL_DIR = REPO_ROOT / "skills" / "stata-code"
|
|
36
|
+
DEFAULT_OUTPUT = REPO_ROOT / "build" / "stata-code-skill.zip"
|
|
37
|
+
ARCHIVE_PREFIX = "stata-code"
|
|
38
|
+
|
|
39
|
+
# Fixed timestamp for reproducible archives (zip epoch starts at 1980).
|
|
40
|
+
_FIXED_DATE_TIME = (1980, 1, 1, 0, 0, 0)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def collect_files(skill_dir: Path = SKILL_DIR) -> list[Path]:
|
|
44
|
+
"""Return every shippable skill file, sorted, relative-stable.
|
|
45
|
+
|
|
46
|
+
Excludes editor/OS cruft so the archive is clean.
|
|
47
|
+
"""
|
|
48
|
+
if not skill_dir.is_dir():
|
|
49
|
+
raise FileNotFoundError(f"skill directory not found: {skill_dir}")
|
|
50
|
+
skip = {".DS_Store"}
|
|
51
|
+
files = [
|
|
52
|
+
p
|
|
53
|
+
for p in skill_dir.rglob("*")
|
|
54
|
+
if p.is_file() and p.name not in skip and "__pycache__" not in p.parts
|
|
55
|
+
]
|
|
56
|
+
return sorted(files)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def build_zip(
|
|
60
|
+
dest: Path = DEFAULT_OUTPUT,
|
|
61
|
+
skill_dir: Path = SKILL_DIR,
|
|
62
|
+
) -> list[str]:
|
|
63
|
+
"""Write the skill archive to ``dest``; return the arcnames included."""
|
|
64
|
+
files = collect_files(skill_dir)
|
|
65
|
+
if not files:
|
|
66
|
+
raise FileNotFoundError(f"no skill files under {skill_dir}")
|
|
67
|
+
|
|
68
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
69
|
+
arcnames: list[str] = []
|
|
70
|
+
with zipfile.ZipFile(dest, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
71
|
+
for path in files:
|
|
72
|
+
rel = path.relative_to(skill_dir).as_posix()
|
|
73
|
+
arcname = f"{ARCHIVE_PREFIX}/{rel}"
|
|
74
|
+
info = zipfile.ZipInfo(arcname, date_time=_FIXED_DATE_TIME)
|
|
75
|
+
info.compress_type = zipfile.ZIP_DEFLATED
|
|
76
|
+
info.external_attr = 0o644 << 16 # regular file, rw-r--r--
|
|
77
|
+
zf.writestr(info, path.read_bytes())
|
|
78
|
+
arcnames.append(arcname)
|
|
79
|
+
return arcnames
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def main() -> int:
|
|
83
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"-o",
|
|
86
|
+
"--output",
|
|
87
|
+
type=Path,
|
|
88
|
+
default=DEFAULT_OUTPUT,
|
|
89
|
+
help=f"Destination .zip (default: {DEFAULT_OUTPUT.relative_to(REPO_ROOT)}).",
|
|
90
|
+
)
|
|
91
|
+
args = parser.parse_args()
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
arcnames = build_zip(args.output)
|
|
95
|
+
except FileNotFoundError as exc:
|
|
96
|
+
print(f"error: {exc}", file=sys.stderr)
|
|
97
|
+
return 1
|
|
98
|
+
|
|
99
|
+
size = args.output.stat().st_size
|
|
100
|
+
print(f"wrote: {args.output} ({len(arcnames)} files, {size:,} bytes)")
|
|
101
|
+
return 0
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
if __name__ == "__main__":
|
|
105
|
+
sys.exit(main())
|
|
@@ -218,6 +218,107 @@ def get_log(ref: str) -> dict[str, Any]:
|
|
|
218
218
|
}
|
|
219
219
|
|
|
220
220
|
|
|
221
|
+
def search_log(
|
|
222
|
+
ref: str,
|
|
223
|
+
pattern: str,
|
|
224
|
+
*,
|
|
225
|
+
is_regex: bool = False,
|
|
226
|
+
ignore_case: bool = True,
|
|
227
|
+
context: int = 0,
|
|
228
|
+
max_matches: int = 50,
|
|
229
|
+
) -> dict[str, Any]:
|
|
230
|
+
"""Auxiliary tool: grep within a stored ``log://`` payload.
|
|
231
|
+
|
|
232
|
+
Pairs with the token-economy default of returning long logs by
|
|
233
|
+
reference: instead of pulling the whole log back with
|
|
234
|
+
:func:`get_log`, the agent can find just the lines it cares about.
|
|
235
|
+
|
|
236
|
+
Parameters
|
|
237
|
+
----------
|
|
238
|
+
ref : str
|
|
239
|
+
A ``log://<request_id>`` ref produced by a truncated ``stata_run``.
|
|
240
|
+
pattern : str
|
|
241
|
+
Substring (default) or regular expression (``is_regex=True``) to
|
|
242
|
+
match against each line.
|
|
243
|
+
is_regex : bool
|
|
244
|
+
Treat ``pattern`` as a Python regular expression. A malformed
|
|
245
|
+
regex raises :class:`ValueError` (surfaced as ``invalid_request``).
|
|
246
|
+
ignore_case : bool
|
|
247
|
+
Case-insensitive matching (default ``True``).
|
|
248
|
+
context : int
|
|
249
|
+
Lines of surrounding context to include on each side of a match
|
|
250
|
+
(capped at 10). ``before`` / ``after`` are omitted when 0.
|
|
251
|
+
max_matches : int
|
|
252
|
+
Stop after this many matches; ``truncated`` reports whether more
|
|
253
|
+
existed (capped at 1000).
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
dict
|
|
258
|
+
``{ref, pattern, is_regex, lines_total, match_count, truncated,
|
|
259
|
+
matches: [{line_no, text, before?, after?}]}``. ``line_no`` is
|
|
260
|
+
1-based. Raises :class:`RefNotFound` for an unknown ref.
|
|
261
|
+
"""
|
|
262
|
+
payload = _refs.get(ref)
|
|
263
|
+
if (
|
|
264
|
+
not isinstance(payload, dict)
|
|
265
|
+
or not isinstance(payload.get("text"), str)
|
|
266
|
+
or "lines_total" not in payload
|
|
267
|
+
):
|
|
268
|
+
raise RefNotFound(ref, kind="unknown_log_ref")
|
|
269
|
+
if not pattern:
|
|
270
|
+
raise ValueError("pattern must be a non-empty string")
|
|
271
|
+
|
|
272
|
+
context = max(0, min(int(context), 10))
|
|
273
|
+
max_matches = max(1, min(int(max_matches), 1000))
|
|
274
|
+
|
|
275
|
+
flags = re.IGNORECASE if ignore_case else 0
|
|
276
|
+
if is_regex:
|
|
277
|
+
try:
|
|
278
|
+
matcher = re.compile(pattern, flags)
|
|
279
|
+
except re.error as exc:
|
|
280
|
+
raise ValueError(f"invalid regex: {exc}") from exc
|
|
281
|
+
|
|
282
|
+
def _hit(line: str) -> bool:
|
|
283
|
+
return matcher.search(line) is not None
|
|
284
|
+
else:
|
|
285
|
+
needle = pattern.lower() if ignore_case else pattern
|
|
286
|
+
|
|
287
|
+
def _hit(line: str) -> bool:
|
|
288
|
+
hay = line.lower() if ignore_case else line
|
|
289
|
+
return needle in hay
|
|
290
|
+
|
|
291
|
+
text: str = payload["text"]
|
|
292
|
+
lines = text.split("\n")
|
|
293
|
+
matches: list[dict[str, Any]] = []
|
|
294
|
+
truncated = False
|
|
295
|
+
for idx, line in enumerate(lines):
|
|
296
|
+
if not _hit(line):
|
|
297
|
+
continue
|
|
298
|
+
if len(matches) >= max_matches:
|
|
299
|
+
truncated = True
|
|
300
|
+
break
|
|
301
|
+
entry: dict[str, Any] = {"line_no": idx + 1, "text": line}
|
|
302
|
+
if context:
|
|
303
|
+
before = lines[max(0, idx - context):idx]
|
|
304
|
+
after = lines[idx + 1:idx + 1 + context]
|
|
305
|
+
if before:
|
|
306
|
+
entry["before"] = before
|
|
307
|
+
if after:
|
|
308
|
+
entry["after"] = after
|
|
309
|
+
matches.append(entry)
|
|
310
|
+
|
|
311
|
+
return {
|
|
312
|
+
"ref": ref,
|
|
313
|
+
"pattern": pattern,
|
|
314
|
+
"is_regex": is_regex,
|
|
315
|
+
"lines_total": payload["lines_total"],
|
|
316
|
+
"match_count": len(matches),
|
|
317
|
+
"truncated": truncated,
|
|
318
|
+
"matches": matches,
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
|
|
221
322
|
def cancel(session_id: str = "main") -> bool:
|
|
222
323
|
"""Request cancellation of the next ``execute()`` call for ``session_id``.
|
|
223
324
|
|