stata-code 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,447 @@
1
+ """Stata _rc → ErrorKind mapping and canonical remediation suggestion seeds.
2
+
3
+ The mapping table here is deliberately living code, not part of the normative
4
+ SCHEMA.md. New rc codes default to ErrorKind.UNKNOWN; we tighten the table over
5
+ time as we encounter real-world failures.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import difflib
11
+
12
+ from stata_code.core.schema import ErrorKind, Suggestion
13
+
14
+ # ─────────────────────────────────────────────────────────────────────────────
15
+ # Stata _rc → ErrorKind
16
+ # ─────────────────────────────────────────────────────────────────────────────
17
+
18
+ RC_TO_KIND: dict[int, ErrorKind] = {
19
+ # Syntax family (parser-level rejection)
20
+ 9: ErrorKind.SYNTAX,
21
+ 100: ErrorKind.SYNTAX,
22
+ 101: ErrorKind.SYNTAX,
23
+ 102: ErrorKind.SYNTAX,
24
+ 103: ErrorKind.SYNTAX,
25
+ 121: ErrorKind.SYNTAX,
26
+ 130: ErrorKind.SYNTAX,
27
+ 132: ErrorKind.SYNTAX,
28
+ 197: ErrorKind.SYNTAX,
29
+ 198: ErrorKind.SYNTAX,
30
+ # Command resolution
31
+ 199: ErrorKind.COMMAND_NOT_FOUND,
32
+ # Varname / name
33
+ 111: ErrorKind.VARNAME_NOT_FOUND,
34
+ 122: ErrorKind.INVALID_NAME,
35
+ 123: ErrorKind.INVALID_NAME,
36
+ 110: ErrorKind.NAME_CONFLICT,
37
+ # Types
38
+ 109: ErrorKind.TYPE_MISMATCH,
39
+ 408: ErrorKind.TYPE_MISMATCH,
40
+ # Sorting
41
+ 119: ErrorKind.NOT_SORTED,
42
+ 459: ErrorKind.NOT_SORTED,
43
+ # Estimation / convergence
44
+ 430: ErrorKind.CONVERGENCE,
45
+ 491: ErrorKind.INFEASIBLE,
46
+ 301: ErrorKind.NO_ESTIMATION_RESULTS,
47
+ 1400: ErrorKind.ESTIMATION_SAMPLE_EMPTY,
48
+ 1401: ErrorKind.ESTIMATION_FAILURE,
49
+ 1402: ErrorKind.ESTIMATION_FAILURE,
50
+ # Observations
51
+ 2000: ErrorKind.NO_OBSERVATIONS,
52
+ 2001: ErrorKind.NO_OBSERVATIONS,
53
+ # Data state
54
+ 4: ErrorKind.DATA_IN_MEMORY,
55
+ # Matrix
56
+ 503: ErrorKind.MATRIX_CONFORMABILITY,
57
+ 507: ErrorKind.MATRIX_CONFORMABILITY,
58
+ 504: ErrorKind.MATRIX_MISSING,
59
+ 506: ErrorKind.MATRIX_SINGULAR,
60
+ 508: ErrorKind.MATRIX_SINGULAR,
61
+ # Files
62
+ 322: ErrorKind.FILE_NOT_FOUND,
63
+ 601: ErrorKind.FILE_NOT_FOUND,
64
+ 602: ErrorKind.FILE_EXISTS,
65
+ 603: ErrorKind.FILE_IO,
66
+ 604: ErrorKind.FILE_CORRUPT,
67
+ 610: ErrorKind.FILE_CORRUPT,
68
+ # Network
69
+ 691: ErrorKind.NETWORK,
70
+ 692: ErrorKind.NETWORK,
71
+ 693: ErrorKind.NETWORK,
72
+ # Permission / encoding
73
+ 608: ErrorKind.PERMISSION,
74
+ 615: ErrorKind.ENCODING,
75
+ 616: ErrorKind.ENCODING,
76
+ # Memory / Stata limits
77
+ 901: ErrorKind.STATA_LIMIT,
78
+ 902: ErrorKind.STATA_LIMIT,
79
+ 903: ErrorKind.STATA_LIMIT,
80
+ 480: ErrorKind.OUT_OF_MEMORY,
81
+ 909: ErrorKind.OUT_OF_MEMORY,
82
+ # Interrupt
83
+ 1: ErrorKind.INTERRUPT,
84
+ }
85
+
86
+ # Synthetic codes — the producer (not Stata) sets these.
87
+ SYNTHETIC_RC_TO_KIND: dict[int, ErrorKind] = {
88
+ -1: ErrorKind.ADAPTER_CRASH,
89
+ -2: ErrorKind.TIMEOUT,
90
+ -3: ErrorKind.CANCELLED,
91
+ }
92
+
93
+
94
+ def classify_rc(rc: int) -> ErrorKind:
95
+ """Map a Stata `_rc` (or synthetic code) to its `ErrorKind`."""
96
+ if rc in SYNTHETIC_RC_TO_KIND:
97
+ return SYNTHETIC_RC_TO_KIND[rc]
98
+ return RC_TO_KIND.get(rc, ErrorKind.UNKNOWN)
99
+
100
+
101
+ # ─────────────────────────────────────────────────────────────────────────────
102
+ # Curated catalog of common Stata commands for fuzzy "did you mean" matching
103
+ # on rc 199 (command_not_found). Kept as a module constant so it's cheap to
104
+ # import and easy to extend. This is intentionally not exhaustive — it covers
105
+ # the high-traffic commands an agent is most likely to mistype.
106
+ # ─────────────────────────────────────────────────────────────────────────────
107
+
108
+ COMMON_STATA_COMMANDS: tuple[str, ...] = (
109
+ # Estimation
110
+ "regress", "logit", "probit", "areg", "ivregress", "reghdfe",
111
+ "xtreg", "xtivreg",
112
+ # Summary / display
113
+ "summarize", "tabulate", "tabstat", "table", "list",
114
+ "describe", "codebook",
115
+ # Data manipulation
116
+ "generate", "replace", "drop", "keep", "sort", "gsort",
117
+ "by", "bysort", "merge", "append", "save", "use", "sysuse",
118
+ "import", "export", "encode", "decode", "recode", "label",
119
+ "rename", "reshape", "collapse", "egen",
120
+ # Postestimation
121
+ "predict", "estimates", "margins", "test", "testparm",
122
+ "lincom", "nlcom",
123
+ # Programming primitives
124
+ "mat", "matrix", "scalar", "local", "global",
125
+ "di", "display", "set", "clear", "exit", "do", "run",
126
+ "capture", "quietly", "noisily",
127
+ "foreach", "forvalues", "while", "if", "else",
128
+ "program", "return", "ereturn",
129
+ "postutil", "post", "postclose",
130
+ "putexcel", "putdocx", "file",
131
+ # Logging / I/O / shell
132
+ "log", "cmdlog", "cd", "pwd", "mkdir", "dir", "ls",
133
+ "cap", "qui", "noi",
134
+ # Versions / help / packages
135
+ "version", "which", "ssc", "net", "search", "help", "findit",
136
+ "view", "browse", "edit",
137
+ # Time-series / panel setup
138
+ "tsset", "xtset", "stset",
139
+ )
140
+
141
+
142
+ # ─────────────────────────────────────────────────────────────────────────────
143
+ # Canonical remediation suggestion seeds
144
+ # ─────────────────────────────────────────────────────────────────────────────
145
+
146
+
147
+ def suggestions_for(
148
+ kind: ErrorKind,
149
+ *,
150
+ varname: str | None = None,
151
+ name: str | None = None,
152
+ command: str | None = None,
153
+ path: str | None = None,
154
+ available_varnames: list[str] | None = None,
155
+ ) -> list[Suggestion]:
156
+ """Generate canonical remediation suggestions for an error kind.
157
+
158
+ Best-effort. Returns an empty list when no canonical hint applies.
159
+
160
+ Parameters
161
+ ----------
162
+ kind : ErrorKind
163
+ The classified error kind.
164
+ varname : str, optional
165
+ The bad variable name parsed from the Stata error message
166
+ (used for VARNAME_NOT_FOUND).
167
+ name : str, optional
168
+ The conflicting name parsed from the Stata error message
169
+ (used for NAME_CONFLICT).
170
+ command : str, optional
171
+ The unrecognized command parsed from the Stata error message
172
+ (used for COMMAND_NOT_FOUND fuzzy matching).
173
+ path : str, optional
174
+ The offending file path (used for FILE_NOT_FOUND).
175
+ available_varnames : list[str], optional
176
+ Variable names currently in memory; used as the candidate set for
177
+ `varname_not_found` fuzzy matching. The runner passes this from
178
+ `dataset.variables` (capped at 200 names per SCHEMA §3.5).
179
+ """
180
+ out: list[Suggestion] = []
181
+
182
+ if kind == ErrorKind.VARNAME_NOT_FOUND:
183
+ out.extend(_varname_suggestions(varname, available_varnames))
184
+
185
+ elif kind == ErrorKind.COMMAND_NOT_FOUND:
186
+ out.extend(_command_suggestions(command))
187
+
188
+ elif kind == ErrorKind.NAME_CONFLICT:
189
+ target = f"`{name}`" if name else "the name"
190
+ if name:
191
+ out.append(
192
+ Suggestion(
193
+ action=(
194
+ f"{target} already exists. "
195
+ f"Use `replace {name} = ...` to overwrite, "
196
+ f"or `drop {name}` first."
197
+ ),
198
+ command=f"drop {name}",
199
+ )
200
+ )
201
+ else:
202
+ out.append(
203
+ Suggestion(
204
+ action=(
205
+ f"{target} already exists. "
206
+ "If overwriting is intended, use the `replace` option."
207
+ ),
208
+ )
209
+ )
210
+
211
+ elif kind == ErrorKind.NOT_SORTED:
212
+ out.append(
213
+ Suggestion(
214
+ action=(
215
+ "Data must be sorted before this command. "
216
+ "Run `sort <by-vars>` first."
217
+ ),
218
+ command="sort",
219
+ )
220
+ )
221
+
222
+ elif kind == ErrorKind.DATA_IN_MEMORY:
223
+ out.append(
224
+ Suggestion(
225
+ action=(
226
+ "Data in memory would be lost. "
227
+ "Use `clear` to discard, or save first."
228
+ ),
229
+ command="clear",
230
+ )
231
+ )
232
+
233
+ elif kind == ErrorKind.NO_ESTIMATION_RESULTS:
234
+ out.append(
235
+ Suggestion(
236
+ action=(
237
+ "No prior estimation results. "
238
+ "Run an estimation command (e.g., `regress`) before "
239
+ "`predict` / `margins`."
240
+ ),
241
+ )
242
+ )
243
+
244
+ elif kind == ErrorKind.FILE_NOT_FOUND:
245
+ out.extend(_file_not_found_suggestions(path))
246
+
247
+ elif kind == ErrorKind.FILE_EXISTS:
248
+ target = f"`{path}`" if path else "the target file"
249
+ out.append(
250
+ Suggestion(
251
+ action=(
252
+ f"{target} already exists. "
253
+ "Pass the `replace` option to overwrite."
254
+ ),
255
+ )
256
+ )
257
+
258
+ elif kind == ErrorKind.STATA_LIMIT:
259
+ out.append(
260
+ Suggestion(
261
+ action=(
262
+ "Stata edition / matsize limit reached. "
263
+ "Try `set maxvar` / `set matsize`, or upgrade Stata edition."
264
+ ),
265
+ )
266
+ )
267
+
268
+ elif kind == ErrorKind.OUT_OF_MEMORY:
269
+ out.append(
270
+ Suggestion(
271
+ action=(
272
+ "Out of memory. Try `compress` to shrink storage types, "
273
+ "drop unneeded vars/obs (`keep var*` / `keep if ...`), "
274
+ "or `set memory` (Stata 12 and earlier). "
275
+ "Upgrading Stata edition (SE → MP) raises the ceiling."
276
+ ),
277
+ command="compress",
278
+ )
279
+ )
280
+
281
+ elif kind == ErrorKind.MATRIX_SINGULAR:
282
+ out.append(
283
+ Suggestion(
284
+ action=(
285
+ "Matrix is singular or not positive definite. "
286
+ "Check for collinear regressors with `corr` or `vif` "
287
+ "after `regress`. If a constant-free model is intended, "
288
+ "the `noconst` option may help."
289
+ ),
290
+ )
291
+ )
292
+
293
+ elif kind == ErrorKind.MATRIX_CONFORMABILITY:
294
+ out.append(
295
+ Suggestion(
296
+ action=(
297
+ "Matrices are not conformable. "
298
+ "Verify operand shapes with `rowsof()` and `colsof()`."
299
+ ),
300
+ )
301
+ )
302
+
303
+ elif kind == ErrorKind.NO_OBSERVATIONS:
304
+ out.append(
305
+ Suggestion(
306
+ action=(
307
+ "No observations match the specified `if`/`in` "
308
+ "criteria. Use `count if <conditions>` to debug, "
309
+ "or drop the `if`/`in` clause to widen the sample."
310
+ ),
311
+ command="count",
312
+ )
313
+ )
314
+
315
+ elif kind == ErrorKind.ESTIMATION_SAMPLE_EMPTY:
316
+ out.append(
317
+ Suggestion(
318
+ action=(
319
+ "Estimation sample is empty after applying "
320
+ "`if`/`in`/missing-data exclusions. "
321
+ "Use `count if <conditions>` to debug, and inspect "
322
+ "missingness with `misstable summarize`."
323
+ ),
324
+ command="count",
325
+ )
326
+ )
327
+
328
+ elif kind == ErrorKind.CONVERGENCE:
329
+ out.append(
330
+ Suggestion(
331
+ action=(
332
+ "Optimizer did not converge. Try increasing "
333
+ "`iterate(50)` or relaxing `nrtolerance(1e-5)`. "
334
+ "An alternate algorithm via `technique(bfgs)` "
335
+ "(or `nr` / `dfp`) sometimes helps."
336
+ ),
337
+ )
338
+ )
339
+
340
+ return out
341
+
342
+
343
+ # ─────────────────────────────────────────────────────────────────────────────
344
+ # Internal helpers
345
+ # ─────────────────────────────────────────────────────────────────────────────
346
+
347
+
348
+ def _varname_suggestions(
349
+ varname: str | None,
350
+ available_varnames: list[str] | None,
351
+ ) -> list[Suggestion]:
352
+ """Build varname_not_found suggestions.
353
+
354
+ With candidates: emit one suggestion per close match (n=3, cutoff=0.6).
355
+ Without candidates / no matches: emit a `describe` hint.
356
+ """
357
+ if varname is None:
358
+ return [
359
+ Suggestion(
360
+ action="Run `describe` to list variables in memory.",
361
+ command="describe",
362
+ )
363
+ ]
364
+ if available_varnames:
365
+ matches = difflib.get_close_matches(
366
+ varname, available_varnames, n=3, cutoff=0.6
367
+ )
368
+ if matches:
369
+ return [
370
+ Suggestion(
371
+ action=(
372
+ f"Did you mean `{cand}`? "
373
+ f"`{varname}` is not in the current dataset."
374
+ ),
375
+ command="describe",
376
+ )
377
+ for cand in matches
378
+ ]
379
+ # No close match — generic fallback.
380
+ return [
381
+ Suggestion(
382
+ action=(
383
+ f"`{varname}` is not in the current dataset. "
384
+ "Run `describe` to list available variables."
385
+ ),
386
+ command="describe",
387
+ )
388
+ ]
389
+
390
+
391
+ def _command_suggestions(command: str | None) -> list[Suggestion]:
392
+ """Build command_not_found suggestions: fuzzy match + ssc/net hint.
393
+
394
+ The ssc/net hint always appears so agents know where community-contributed
395
+ packages come from. The fuzzy match (top 3, cutoff 0.65) appears first
396
+ when one or more commands are close enough.
397
+ """
398
+ out: list[Suggestion] = []
399
+ if command:
400
+ matches = difflib.get_close_matches(
401
+ command, COMMON_STATA_COMMANDS, n=3, cutoff=0.65
402
+ )
403
+ for cand in matches:
404
+ out.append(
405
+ Suggestion(
406
+ action=f"Did you mean `{cand}`?",
407
+ command=cand,
408
+ )
409
+ )
410
+ out.append(
411
+ Suggestion(
412
+ action=(
413
+ "Command not recognized. "
414
+ "If it is a community-contributed package, "
415
+ "try `ssc install <name>` or `net install <name>`."
416
+ ),
417
+ )
418
+ )
419
+ return out
420
+
421
+
422
+ def _file_not_found_suggestions(path: str | None) -> list[Suggestion]:
423
+ """Build file_not_found suggestions: pwd + optional extension hint."""
424
+ target = f"`{path}`" if path else "the requested file"
425
+ out: list[Suggestion] = [
426
+ Suggestion(
427
+ action=(
428
+ f"{target} not found. "
429
+ "Verify the path and the current working directory "
430
+ "(`pwd`, `ls`)."
431
+ ),
432
+ command="pwd",
433
+ )
434
+ ]
435
+ # If the path looks like it's missing an extension, add a hint.
436
+ # `.` heuristic: dataset / script paths nearly always have one.
437
+ if path and "." not in path:
438
+ out.append(
439
+ Suggestion(
440
+ action=(
441
+ f"`{path}` has no file extension. "
442
+ "If you meant a Stata dataset, try `{path}.dta`. "
443
+ "If you meant a do-file, try `{path}.do`."
444
+ ).replace("{path}", path),
445
+ )
446
+ )
447
+ return out