stata-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
stata_cli/main.py ADDED
@@ -0,0 +1,343 @@
1
+ """Stata CLI - run Stata commands from the terminal."""
2
+
3
+ import json
4
+ import os
5
+ import sys
6
+
7
+ import click
8
+
9
+ from . import __version__
10
+ from .engine import StataEngine, Result
11
+ from .output_filter import apply_compact_filter, check_token_limit, clean_log_wrapper
12
+ from .utils import detect_stata_path
13
+
14
+
15
+ # Exit codes
16
+ EXIT_OK = 0
17
+ EXIT_STATA_ERROR = 1
18
+ EXIT_USAGE_ERROR = 2
19
+ EXIT_INIT_FAILURE = 3
20
+
21
+
22
+ def _exit(code: int) -> None:
23
+ """Exit bypassing atexit hooks — PyStata registers one that resets the exit code to 0."""
24
+ sys.stdout.flush()
25
+ sys.stderr.flush()
26
+ os._exit(code)
27
+
28
+
29
+ @click.group()
30
+ @click.version_option(__version__, prog_name="stata-cli")
31
+ @click.option("--stata-path", envvar="STATA_PATH", default=None, help="Path to Stata installation directory.")
32
+ @click.option("--edition", type=click.Choice(["mp", "se", "be"], case_sensitive=False), default="mp", help="Stata edition.")
33
+ @click.option("--compact", is_flag=True, default=False, help="Apply compact output filter (strip verbose noise).")
34
+ @click.option("--json", "use_json", is_flag=True, default=False, help="Output results as JSON (for agent consumption).")
35
+ @click.option("--timeout", type=float, default=600.0, help="Execution timeout in seconds.")
36
+ @click.option("--max-tokens", type=int, default=0, help="Max output tokens (0=unlimited). Saves full output to file when exceeded.")
37
+ @click.option("--no-daemon", is_flag=True, default=False, help="Force direct execution, skip daemon.")
38
+ @click.option("--graphs-dir", envvar="STATA_CLI_GRAPHS_DIR", default=None, help="Graph export directory.")
39
+ @click.pass_context
40
+ def cli(ctx, stata_path, edition, compact, use_json, timeout, max_tokens, no_daemon, graphs_dir):
41
+ """Command-line interface for Stata."""
42
+ ctx.ensure_object(dict)
43
+ ctx.obj["stata_path"] = stata_path
44
+ ctx.obj["edition"] = edition
45
+ ctx.obj["compact"] = compact
46
+ ctx.obj["json"] = use_json
47
+ ctx.obj["timeout"] = timeout
48
+ ctx.obj["max_tokens"] = max_tokens
49
+ ctx.obj["no_daemon"] = no_daemon
50
+ ctx.obj["graphs_dir"] = graphs_dir
51
+
52
+
53
+ def _get_engine(ctx) -> StataEngine:
54
+ stata_path = ctx.obj["stata_path"] or detect_stata_path()
55
+ if not stata_path:
56
+ click.echo("Error: Stata installation not found.", err=True)
57
+ click.echo("Set --stata-path or the STATA_PATH environment variable.", err=True)
58
+ _exit(EXIT_INIT_FAILURE)
59
+ try:
60
+ engine = StataEngine(stata_path, ctx.obj["edition"], graphs_dir=ctx.obj.get("graphs_dir"))
61
+ return engine
62
+ except Exception as exc:
63
+ click.echo(f"Error initializing Stata: {exc}", err=True)
64
+ _exit(EXIT_INIT_FAILURE)
65
+
66
+
67
+ def _try_daemon(ctx, cmd_type: str, payload: dict) -> Result | None:
68
+ """Try to route through daemon. Returns None if daemon unavailable."""
69
+ if ctx.obj.get("no_daemon"):
70
+ return None
71
+ try:
72
+ from .daemon import DaemonClient
73
+ client = DaemonClient()
74
+ if not client.is_running():
75
+ return None
76
+ if not client.connect():
77
+ return None
78
+ resp = client.send(cmd_type, payload)
79
+ client.close()
80
+ return Result(
81
+ success=resp.get("success", resp.get("status") == "success"),
82
+ output=resp.get("output", ""),
83
+ error=resp.get("error", ""),
84
+ execution_time=resp.get("execution_time", 0.0),
85
+ return_code=resp.get("return_code", 0),
86
+ extra=resp.get("extra", {}),
87
+ )
88
+ except Exception:
89
+ return None
90
+
91
+
92
+ def _print_result(result, compact: bool, use_json: bool = False, max_tokens: int = 0, filter_echo: bool = False) -> None:
93
+ output = result.output
94
+ if output:
95
+ output = clean_log_wrapper(output)
96
+ if compact:
97
+ output = apply_compact_filter(output, filter_command_echo=filter_echo)
98
+ if max_tokens > 0:
99
+ output, _ = check_token_limit(output, max_tokens)
100
+ result.output = output
101
+
102
+ graphs = result.extra.get("graphs", []) if result.extra else []
103
+
104
+ if use_json:
105
+ click.echo(result.to_json())
106
+ if not result.success:
107
+ _exit(EXIT_STATA_ERROR)
108
+ return
109
+
110
+ if output and output.strip():
111
+ click.echo(output)
112
+ if graphs:
113
+ for g in graphs:
114
+ click.echo(f"[graph] {g.get('name', 'graph')}: {g.get('path', '')}")
115
+ if not result.success:
116
+ if result.error:
117
+ click.echo(result.error, err=True)
118
+ _exit(EXIT_STATA_ERROR)
119
+
120
+
121
+ # ── Commands ─────────────────────────────────────────────────────────────
122
+
123
+ @cli.command()
124
+ @click.argument("code")
125
+ @click.pass_context
126
+ def run(ctx, code):
127
+ """Execute a Stata code string.
128
+
129
+ Use '-' to read code from stdin (for piping).
130
+
131
+ \b
132
+ Examples:
133
+ stata-cli run "sysuse auto, clear"
134
+ stata-cli run "display 1+1"
135
+ echo "summarize price" | stata-cli run -
136
+ """
137
+ if code == "-":
138
+ code = sys.stdin.read()
139
+ if not code.strip():
140
+ click.echo("Error: empty code.", err=True)
141
+ _exit(EXIT_USAGE_ERROR)
142
+
143
+ result = _try_daemon(ctx, "execute", {"code": code, "timeout": ctx.obj["timeout"]})
144
+ if result is None:
145
+ engine = _get_engine(ctx)
146
+ result = engine.run(code, timeout=ctx.obj["timeout"])
147
+ _print_result(result, ctx.obj["compact"], use_json=ctx.obj["json"], max_tokens=ctx.obj["max_tokens"])
148
+
149
+
150
+ @cli.command("do")
151
+ @click.argument("path", type=click.Path(exists=True))
152
+ @click.pass_context
153
+ def do_file(ctx, path):
154
+ """Execute a Stata .do file.
155
+
156
+ \b
157
+ Examples:
158
+ stata-cli do analysis.do
159
+ stata-cli --compact do long_script.do
160
+ """
161
+ result = _try_daemon(ctx, "execute_file", {"path": os.path.abspath(path), "timeout": ctx.obj["timeout"]})
162
+ if result is None:
163
+ engine = _get_engine(ctx)
164
+ result = engine.run_file(path, timeout=ctx.obj["timeout"])
165
+ _print_result(result, ctx.obj["compact"], use_json=ctx.obj["json"], max_tokens=ctx.obj["max_tokens"], filter_echo=True)
166
+
167
+
168
+ @cli.command()
169
+ @click.pass_context
170
+ def detect(ctx):
171
+ """Print the auto-detected Stata installation path."""
172
+ stata_path = ctx.obj["stata_path"] or detect_stata_path()
173
+ if stata_path:
174
+ click.echo(stata_path)
175
+ else:
176
+ click.echo("Stata installation not found.", err=True)
177
+ _exit(EXIT_INIT_FAILURE)
178
+
179
+
180
+ @cli.command("data")
181
+ @click.option("--if", "if_condition", default=None, help="Stata if condition for filtering.")
182
+ @click.option("--rows", type=int, default=10000, help="Maximum rows to return.")
183
+ @click.pass_context
184
+ def data_cmd(ctx, if_condition, rows):
185
+ """View the current dataset as JSON.
186
+
187
+ \b
188
+ Examples:
189
+ stata-cli data
190
+ stata-cli data --if "price>5000" --rows 50
191
+ """
192
+ # Try daemon first
193
+ try:
194
+ from .daemon import DaemonClient
195
+ client = DaemonClient()
196
+ if not ctx.obj.get("no_daemon") and client.is_running() and client.connect():
197
+ resp = client.send("get_data", {"if_condition": if_condition, "max_rows": rows})
198
+ client.close()
199
+ click.echo(json.dumps(resp, ensure_ascii=False, indent=2))
200
+ return
201
+ except Exception:
202
+ pass
203
+
204
+ engine = _get_engine(ctx)
205
+ resp = engine.get_data(if_condition=if_condition, max_rows=rows)
206
+ click.echo(json.dumps(resp, ensure_ascii=False, indent=2))
207
+
208
+
209
+ @cli.command("help")
210
+ @click.argument("topic")
211
+ @click.pass_context
212
+ def help_cmd(ctx, topic):
213
+ """Display Stata help for a topic.
214
+
215
+ \b
216
+ Examples:
217
+ stata-cli help regress
218
+ stata-cli help summarize
219
+ """
220
+ result = _try_daemon(ctx, "help", {"topic": topic})
221
+ if result is None:
222
+ engine = _get_engine(ctx)
223
+ result = engine.help(topic)
224
+ if ctx.obj["json"]:
225
+ click.echo(result.to_json())
226
+ elif result.output and result.output.strip():
227
+ click.echo(result.output)
228
+ else:
229
+ click.echo(f"No help found for: {topic}", err=True)
230
+ _exit(EXIT_STATA_ERROR)
231
+
232
+
233
+ @cli.command("stop")
234
+ @click.pass_context
235
+ def stop_cmd(ctx):
236
+ """Interrupt a running Stata command (daemon mode)."""
237
+ try:
238
+ from .daemon import DaemonClient
239
+ client = DaemonClient()
240
+ if client.is_running() and client.connect():
241
+ resp = client.send("stop")
242
+ client.close()
243
+ click.echo(f"Stop signal: {resp.get('status', 'unknown')}")
244
+ return
245
+ except Exception:
246
+ pass
247
+ click.echo("Daemon not running.", err=True)
248
+ _exit(EXIT_USAGE_ERROR)
249
+
250
+
251
+ # ── Daemon subcommands ───────────────────────────────────────────────────
252
+
253
+ @cli.group()
254
+ def daemon():
255
+ """Manage the Stata daemon process."""
256
+
257
+
258
+ @daemon.command("start")
259
+ @click.option("--idle-timeout", type=int, default=3600, help="Auto-shutdown after N seconds idle.")
260
+ @click.pass_context
261
+ def daemon_start(ctx, idle_timeout):
262
+ """Start the Stata daemon (keeps PyStata alive for fast execution)."""
263
+ stata_path = ctx.obj["stata_path"] or detect_stata_path()
264
+ if not stata_path:
265
+ click.echo("Error: Stata installation not found.", err=True)
266
+ _exit(EXIT_INIT_FAILURE)
267
+
268
+ from .daemon import start_daemon, DaemonClient
269
+ client = DaemonClient()
270
+ if client.is_running():
271
+ click.echo("Daemon already running.")
272
+ return
273
+
274
+ click.echo("Starting daemon...")
275
+ ok = start_daemon(stata_path, ctx.obj["edition"], graphs_dir=ctx.obj.get("graphs_dir"), idle_timeout=idle_timeout)
276
+ if ok:
277
+ click.echo("Daemon started.")
278
+ else:
279
+ click.echo("Failed to start daemon.", err=True)
280
+ _exit(EXIT_INIT_FAILURE)
281
+
282
+
283
+ @daemon.command("stop")
284
+ def daemon_stop():
285
+ """Stop the Stata daemon."""
286
+ from .daemon import stop_daemon, DaemonClient
287
+ client = DaemonClient()
288
+ if not client.is_running():
289
+ click.echo("Daemon not running.")
290
+ return
291
+ click.echo("Stopping daemon...")
292
+ stop_daemon()
293
+ click.echo("Daemon stopped.")
294
+
295
+
296
+ @daemon.command("status")
297
+ def daemon_status_cmd():
298
+ """Show daemon status."""
299
+ from .daemon import daemon_status
300
+ info = daemon_status()
301
+ if not info:
302
+ click.echo("Daemon not running.")
303
+ return
304
+ uptime = info.get("uptime", 0)
305
+ idle = info.get("idle", 0)
306
+ click.echo(f"Daemon running (PID {info.get('pid', '?')})")
307
+ click.echo(f" Stata: {info.get('stata_path', '?')} ({info.get('edition', '?')})")
308
+ click.echo(f" Uptime: {int(uptime)}s")
309
+ click.echo(f" Idle: {int(idle)}s")
310
+
311
+
312
+ @daemon.command("restart")
313
+ @click.option("--idle-timeout", type=int, default=3600, help="Auto-shutdown after N seconds idle.")
314
+ @click.pass_context
315
+ def daemon_restart(ctx, idle_timeout):
316
+ """Restart the Stata daemon."""
317
+ from .daemon import stop_daemon, start_daemon, DaemonClient
318
+ client = DaemonClient()
319
+ if client.is_running():
320
+ click.echo("Stopping daemon...")
321
+ stop_daemon()
322
+
323
+ stata_path = ctx.obj["stata_path"] or detect_stata_path()
324
+ if not stata_path:
325
+ click.echo("Error: Stata installation not found.", err=True)
326
+ _exit(EXIT_INIT_FAILURE)
327
+
328
+ click.echo("Starting daemon...")
329
+ ok = start_daemon(stata_path, ctx.obj["edition"], graphs_dir=ctx.obj.get("graphs_dir"), idle_timeout=idle_timeout)
330
+ if ok:
331
+ click.echo("Daemon restarted.")
332
+ else:
333
+ click.echo("Failed to restart daemon.", err=True)
334
+ _exit(EXIT_INIT_FAILURE)
335
+
336
+
337
+ # Allow running as `python -m stata_cli`
338
+ def main():
339
+ cli()
340
+
341
+
342
+ if __name__ == "__main__":
343
+ main()
@@ -0,0 +1,239 @@
1
+ """Output filtering for Stata CLI.
2
+
3
+ Provides compact-mode filtering (strips verbose/redundant output) and
4
+ cleanup of the log-file wrapper lines injected by the engine.
5
+ """
6
+
7
+ import os
8
+ import re
9
+ import time
10
+ import tempfile
11
+
12
+
13
+ _BANNER_END_RE = re.compile(r"^-{40,}$")
14
+ _LOG_SCAFFOLD_PATTERNS = [
15
+ re.compile(r"^\s*\.?\s*capture\s+log\s+close", re.IGNORECASE),
16
+ re.compile(r"^\s*\.?\s*log\s+using\s+", re.IGNORECASE),
17
+ re.compile(r"^\s*(name|log|log type|opened on|closed on):", re.IGNORECASE),
18
+ # Continuation of a long log-using path that wraps to the next line
19
+ re.compile(r"^>\s.*\.log"),
20
+ ]
21
+
22
+
23
+ def clean_log_wrapper(output: str) -> str:
24
+ """Remove Stata banner and ``log using`` / ``log close`` scaffolding."""
25
+ if not output:
26
+ return output
27
+
28
+ lines = output.split("\n")
29
+
30
+ # 1. Strip the startup banner (everything up to and including the "---…" separator)
31
+ start = 0
32
+ for i, line in enumerate(lines):
33
+ if _BANNER_END_RE.match(line.strip()):
34
+ start = i + 1
35
+ break
36
+
37
+ cleaned: list[str] = []
38
+ for line in lines[start:]:
39
+ if any(pat.match(line.strip()) for pat in _LOG_SCAFFOLD_PATTERNS):
40
+ continue
41
+ cleaned.append(line)
42
+
43
+ while cleaned and not cleaned[0].strip():
44
+ cleaned.pop(0)
45
+ while cleaned and not cleaned[-1].strip():
46
+ cleaned.pop()
47
+ return "\n".join(cleaned)
48
+
49
+
50
+ def apply_compact_filter(output: str, filter_command_echo: bool = False) -> str:
51
+ """Strip verbose/redundant output to reduce noise.
52
+
53
+ Always filters:
54
+ - Program definition blocks
55
+ - Mata blocks
56
+ - Loop code echoes (keeps actual output)
57
+ - SMCL formatting tags
58
+ - Verbose messages like "(N real changes made)"
59
+
60
+ When *filter_command_echo* is True (e.g. for ``do`` files):
61
+ - Command echo lines (``". "`` prefix)
62
+ - Line continuations (``"> "``)
63
+ """
64
+ if not output:
65
+ return output
66
+
67
+ output = output.replace("\r\n", "\n").replace("\r", "\n")
68
+ lines = output.split("\n")
69
+ filtered: list[str] = []
70
+
71
+ command_echo_pat = re.compile(r"^\.\s*$|^\.\s+\S")
72
+ numbered_line_pat = re.compile(r"^\s*\d+\.\s")
73
+ continuation_pat = re.compile(r"^>\s")
74
+
75
+ program_drop_pat = re.compile(
76
+ r"^\s*\.?\s*(capture\s+program\s+drop|cap\s+program\s+drop|cap\s+prog\s+drop)\s+\w+",
77
+ re.IGNORECASE,
78
+ )
79
+ program_define_pat = re.compile(
80
+ r"^\s*\.?\s*program\s+(define\s+)?(?!version|dir|drop|list|describe)\w+",
81
+ re.IGNORECASE,
82
+ )
83
+ mata_start_pat = re.compile(
84
+ r"^\s*(\d+\.)?\s*\.?\s*mata\s*:?\s*$|^-+\s*mata\s*\(",
85
+ re.IGNORECASE,
86
+ )
87
+ end_pat = re.compile(r"^\s*(\d+\.)?\s*[.:]*\s*end\s*$", re.IGNORECASE)
88
+ mata_sep_pat = re.compile(r"^-{20,}$")
89
+
90
+ loop_start_pat = re.compile(
91
+ r"^(\s*\d+\.)?\s*\.?\s*(foreach|forvalues|while)\s+.*\{\s*$",
92
+ re.IGNORECASE,
93
+ )
94
+ loop_end_pat = re.compile(r"^\s*\d+\.\s*\}\s*$")
95
+
96
+ real_changes_pat = re.compile(r"^\s*\([\d,]+\s+real\s+changes?\s+made\)\s*$", re.IGNORECASE)
97
+ missing_values_pat = re.compile(r"^\s*\([\d,]+\s+missing\s+values?\s+generated\)\s*$", re.IGNORECASE)
98
+ smcl_pat = re.compile(
99
+ r"\{(txt|res|err|inp|com|bf|it|sf|hline|c\s+\||\-+|break|col\s+\d+|right|center|ul|/ul)\}"
100
+ )
101
+
102
+ in_program = False
103
+ in_mata = False
104
+ in_loop = False
105
+ program_end_depth = 0
106
+ loop_brace_depth = 0
107
+
108
+ i = 0
109
+ while i < len(lines):
110
+ line = lines[i]
111
+
112
+ if in_program:
113
+ if mata_start_pat.match(line):
114
+ program_end_depth += 1
115
+ if end_pat.match(line):
116
+ if program_end_depth > 0:
117
+ program_end_depth -= 1
118
+ else:
119
+ in_program = False
120
+ i += 1
121
+ continue
122
+
123
+ if in_mata:
124
+ if end_pat.match(line):
125
+ in_mata = False
126
+ if i + 1 < len(lines) and mata_sep_pat.match(lines[i + 1]):
127
+ i += 1
128
+ i += 1
129
+ continue
130
+
131
+ if in_loop:
132
+ if loop_start_pat.match(line):
133
+ loop_brace_depth += 1
134
+ i += 1
135
+ continue
136
+ if loop_end_pat.match(line):
137
+ if loop_brace_depth > 0:
138
+ loop_brace_depth -= 1
139
+ else:
140
+ in_loop = False
141
+ i += 1
142
+ continue
143
+ if command_echo_pat.match(line) or numbered_line_pat.match(line) or continuation_pat.match(line):
144
+ i += 1
145
+ continue
146
+ if real_changes_pat.match(line) or missing_values_pat.match(line):
147
+ i += 1
148
+ continue
149
+ line = smcl_pat.sub("", line)
150
+ if line.strip():
151
+ filtered.append(line)
152
+ i += 1
153
+ continue
154
+
155
+ if loop_start_pat.match(line):
156
+ in_loop = True
157
+ loop_brace_depth = 0
158
+ i += 1
159
+ continue
160
+
161
+ if program_drop_pat.match(line):
162
+ i += 1
163
+ continue
164
+ if program_define_pat.match(line):
165
+ in_program = True
166
+ program_end_depth = 0
167
+ i += 1
168
+ continue
169
+ if mata_start_pat.match(line):
170
+ in_mata = True
171
+ i += 1
172
+ continue
173
+
174
+ if real_changes_pat.match(line) or missing_values_pat.match(line):
175
+ i += 1
176
+ continue
177
+
178
+ if filter_command_echo:
179
+ if command_echo_pat.match(line) or numbered_line_pat.match(line) or continuation_pat.match(line):
180
+ i += 1
181
+ continue
182
+
183
+ line = smcl_pat.sub("", line)
184
+ filtered.append(line)
185
+ i += 1
186
+
187
+ # Collapse consecutive blank lines
188
+ result: list[str] = []
189
+ prev_blank = False
190
+ for line in filtered:
191
+ is_blank = not line.strip()
192
+ if is_blank:
193
+ if not prev_blank:
194
+ result.append(line)
195
+ prev_blank = True
196
+ else:
197
+ result.append(line)
198
+ prev_blank = False
199
+
200
+ while result and not result[-1].strip():
201
+ result.pop()
202
+
203
+ return "\n".join(result)
204
+
205
+
206
+ def check_token_limit(output: str, max_tokens: int) -> tuple[str, bool]:
207
+ """Truncate output exceeding *max_tokens* (~4 chars/token).
208
+
209
+ Returns ``(output, was_truncated)``. When truncated the full output is
210
+ saved to a temp file and a summary with the file path is returned.
211
+ """
212
+ if max_tokens <= 0 or not output:
213
+ return output, False
214
+
215
+ estimated_tokens = len(output) / 4
216
+ if estimated_tokens <= max_tokens:
217
+ return output, False
218
+
219
+ logs_dir = os.path.join(tempfile.gettempdir(), "stata_cli_logs")
220
+ os.makedirs(logs_dir, exist_ok=True)
221
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
222
+ log_path = os.path.join(logs_dir, f"stata_output_{timestamp}.log")
223
+
224
+ try:
225
+ with open(log_path, "w", encoding="utf-8") as fh:
226
+ fh.write(output)
227
+ except OSError:
228
+ max_chars = max_tokens * 4
229
+ return output[:max_chars] + f"\n\n... [Output truncated at {max_tokens} tokens]", True
230
+
231
+ preview = output[:1000]
232
+ if len(output) > 1000:
233
+ preview += "\n... [truncated]"
234
+ msg = (
235
+ f"Output exceeded token limit ({int(estimated_tokens)} tokens > {max_tokens} max).\n"
236
+ f"Full output saved to: {log_path}\n\n"
237
+ f"--- Preview ---\n{preview}"
238
+ )
239
+ return msg, True
@@ -0,0 +1,93 @@
1
+ """Simplified SMCL-to-plain-text converter for Stata help files."""
2
+
3
+ import re
4
+
5
+ _CHAR_CODES = {
6
+ "S|": "$", "'g": "`", "-(": "{", ")-": "}",
7
+ "-": "─", "|": "│", "+": "┼",
8
+ "TT": "┬", "BT": "┴", "LT": "├", "RT": "┤",
9
+ "TLC": "┌", "TRC": "┐", "BRC": "┘", "BLC": "└",
10
+ "a'": "á", "e'": "é", "i'": "í", "o'": "ó", "u'": "ú",
11
+ "n~": "ñ", "ss": "ß", "c,": "ç",
12
+ }
13
+
14
+
15
+ def _resolve_char(code: str) -> str:
16
+ code = code.strip()
17
+ if code in _CHAR_CODES:
18
+ return _CHAR_CODES[code]
19
+ if code.startswith("0x") or code.startswith("0X"):
20
+ try:
21
+ return chr(int(code[2:], 16))
22
+ except (ValueError, OverflowError):
23
+ return code
24
+ try:
25
+ n = int(code)
26
+ if 1 <= n <= 0x10FFFF:
27
+ return chr(n)
28
+ except (ValueError, OverflowError):
29
+ pass
30
+ return code
31
+
32
+
33
+ # Tags that are simply stripped (content kept)
34
+ _STRIP_TAGS = re.compile(
35
+ r"\{/?(?:txt|res|err|inp|com|bf|it|sf|ul|smcl|s6hlp|"
36
+ r"p_end|pstd|phang|pmore|pin|p2colset[^}]*|p2col[^}]*|"
37
+ r"marker[^}]*|dlgtab[^}]*|synoptset[^}]*|syntab[^}]*|"
38
+ r"synopt[^}]*|synopthdr[^}]*|"
39
+ r"col\s+\d+|right|center|break|reset|"
40
+ r"bind\s+[^}]*)\}"
41
+ )
42
+
43
+ # {hline} or {hline N} -> dashes
44
+ _HLINE_RE = re.compile(r"\{hline(?:\s+(\d+))?\}")
45
+
46
+ # {help topic}, {help topic:text}, {manhelp topic section}
47
+ _HELP_RE = re.compile(r"\{(?:help|manhelp)\s+([^}:]+?)(?::([^}]+))?\}")
48
+
49
+ # {browse "url":text} or {browse "url"}
50
+ _BROWSE_RE = re.compile(r'\{browse\s+"([^"]*)"(?::([^}]+))?\}')
51
+
52
+ # {cmd:text}, {opt:text}, {hi:text}, {title:text}, {it:text}, {bf:text}
53
+ _STYLED_RE = re.compile(r"\{(?:cmd|opt|hi|title|input|stata)\s*:\s*([^}]*)\}")
54
+ _STYLED2_RE = re.compile(r"\{(?:it|bf|ul)\s*:\s*([^}]*)\}")
55
+
56
+ # {c CODE}
57
+ _CHAR_RE = re.compile(r"\{c\s+([^}]+)\}")
58
+
59
+ # {space N}
60
+ _SPACE_RE = re.compile(r"\{space\s+(\d+)\}")
61
+
62
+ # Catch-all: any remaining {tag ...} or {tag:...}
63
+ _CATCHALL_RE = re.compile(r"\{[a-zA-Z_][^}]*\}")
64
+
65
+ # SMCL header line
66
+ _SMCL_HEADER_RE = re.compile(r"^\{smcl\}\s*$", re.MULTILINE)
67
+
68
+ # Star-bang lines in starbang output
69
+ _STARBANG_RE = re.compile(r"^\*!\s?", re.MULTILINE)
70
+
71
+ # INCLUDE directives (Stata-internal cross-references)
72
+ _INCLUDE_RE = re.compile(r"^INCLUDE\s+help\s+\S+.*$", re.MULTILINE)
73
+
74
+
75
+ def smcl_to_text(raw: str) -> str:
76
+ """Convert SMCL markup to readable plain text."""
77
+ text = _SMCL_HEADER_RE.sub("", raw)
78
+ text = _STARBANG_RE.sub("", text)
79
+ text = _INCLUDE_RE.sub("", text)
80
+
81
+ text = _HLINE_RE.sub(lambda m: "-" * int(m.group(1) or 78), text)
82
+ text = _HELP_RE.sub(lambda m: m.group(2) or m.group(1), text)
83
+ text = _BROWSE_RE.sub(lambda m: m.group(2) or m.group(1), text)
84
+ text = _STYLED_RE.sub(r"\1", text)
85
+ text = _STYLED2_RE.sub(r"\1", text)
86
+ text = _CHAR_RE.sub(lambda m: _resolve_char(m.group(1)), text)
87
+ text = _SPACE_RE.sub(lambda m: " " * int(m.group(1)), text)
88
+ text = _STRIP_TAGS.sub("", text)
89
+ text = _CATCHALL_RE.sub("", text)
90
+
91
+ # Collapse runs of >2 blank lines
92
+ text = re.sub(r"\n{3,}", "\n\n", text)
93
+ return text.strip() + "\n"