systemgmmkit 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,80 @@
1
+ """Generic panel-data workflow helpers for FE, RE, IV/2SLS, and Difference/System GMM."""
2
+
3
+ from .diagnostics import DiagnosticCheck, DiagnosticReport, assess_diagnostics
4
+ from .fixed_effects import (
5
+ FixedEffectsResult,
6
+ FixedEffectsSpec,
7
+ run_fixed_effects,
8
+ run_fixed_effects_native,
9
+ )
10
+ from .native_gmm import NativeGMMResult, run_native_dynamic_panel_gmm
11
+ from .panel_iv import PanelIVResult, PanelIVSpec, run_panel_2sls
12
+ from .parity import stata_xtabond2_command, stata_xtreg_fe_command, write_stata_parity_do_file
13
+ from .presets import (
14
+ build_difference_gmm_spec,
15
+ build_dynamic_panel_gmm_spec,
16
+ build_fixed_effects_spec,
17
+ build_panel_model_suite,
18
+ build_system_gmm_spec,
19
+ )
20
+ from .pydynpd_backend import PydynpdGMMResult, build_pydynpd_command, run_pydynpd
21
+ from .random_effects import RandomEffectsResult, RandomEffectsSpec, run_random_effects
22
+ from .reporting import model_card_markdown
23
+ from .spec import DynamicPanelSpec, GMMStyle, IVStyle
24
+ from .suite import PanelModelSuite, PanelModelSuiteResult, run_panel_model_suite
25
+ from .tables import combine_result_frames, export_regression_table, result_to_frame
26
+ from .validation import PanelValidationReport, validate_panel
27
+
28
+ __all__ = [
29
+ "DiagnosticCheck",
30
+ "DiagnosticReport",
31
+ "DynamicPanelSpec",
32
+ "FixedEffectsResult",
33
+ "FixedEffectsSpec",
34
+ "GMMStyle",
35
+ "IVStyle",
36
+ "NativeGMMResult",
37
+ "PanelIVResult",
38
+ "PanelIVSpec",
39
+ "PanelModelSuite",
40
+ "PanelModelSuiteResult",
41
+ "PanelValidationReport",
42
+ "PydynpdGMMResult",
43
+ "RandomEffectsResult",
44
+ "RandomEffectsSpec",
45
+ "assess_diagnostics",
46
+ "build_difference_gmm_spec",
47
+ "build_dynamic_panel_gmm_spec",
48
+ "build_fixed_effects_spec",
49
+ "build_panel_model_suite",
50
+ "build_pydynpd_command",
51
+ "build_system_gmm_spec",
52
+ "combine_result_frames",
53
+ "export_regression_table",
54
+ "model_card_markdown",
55
+ "result_to_frame",
56
+ "run_fixed_effects",
57
+ "run_fixed_effects_native",
58
+ "run_native_dynamic_panel_gmm",
59
+ "run_panel_2sls",
60
+ "run_panel_model_suite",
61
+ "run_pydynpd",
62
+ "run_random_effects",
63
+ "stata_xtabond2_command",
64
+ "stata_xtreg_fe_command",
65
+ "validate_panel",
66
+ "write_stata_parity_do_file",
67
+
68
+ "DynamicPanelBackendError",
69
+ "run_dynamic_panel_gmm",
70
+ "run_system_gmm",
71
+ "run_difference_gmm",]
72
+
73
+ __version__ = "0.4.1"
74
+
75
+ from .dynamic_panel import (
76
+ DynamicPanelBackendError,
77
+ run_difference_gmm,
78
+ run_dynamic_panel_gmm,
79
+ run_system_gmm,
80
+ )
systemgmmkit/cli.py ADDED
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import pandas as pd
9
+
10
+ from .presets import aid_growth_ta_decomposition_spec, aid_growth_techshare_spec
11
+ from .reporting import model_card_markdown
12
+ from .validation import validate_panel
13
+
14
+
15
+ def _build_parser() -> argparse.ArgumentParser:
16
+ parser = argparse.ArgumentParser(
17
+ prog="systemgmmkit", description="Dynamic-panel System GMM workflow helper"
18
+ )
19
+ sub = parser.add_subparsers(dest="command", required=True)
20
+
21
+ validate = sub.add_parser("validate", help="Validate a panel dataset")
22
+ validate.add_argument("csv", type=Path)
23
+ validate.add_argument("--entity", required=True)
24
+ validate.add_argument("--time", required=True)
25
+ validate.add_argument("--vars", nargs="*", default=[])
26
+ validate.add_argument("--json", action="store_true")
27
+
28
+ preset = sub.add_parser("preset", help="Print a preset model card")
29
+ preset.add_argument("name", choices=["techshare", "ta-decomp"])
30
+ preset.add_argument("--no-controls", action="store_true")
31
+ preset.add_argument("--no-three-way", action="store_true")
32
+ preset.add_argument(
33
+ "--difference", action="store_true", help="Use Difference GMM instead of System GMM"
34
+ )
35
+
36
+ return parser
37
+
38
+
39
+ def main(argv: list[str] | None = None) -> int:
40
+ parser = _build_parser()
41
+ args = parser.parse_args(argv)
42
+
43
+ if args.command == "validate":
44
+ df = pd.read_csv(args.csv)
45
+ report = validate_panel(df, entity=args.entity, time=args.time, variables=args.vars)
46
+ if args.json:
47
+ print(json.dumps(report.to_dict(), indent=2))
48
+ else:
49
+ print(json.dumps(report.to_dict(), indent=2))
50
+ return 0
51
+
52
+ if args.command == "preset":
53
+ kwargs = {
54
+ "include_controls": not args.no_controls,
55
+ "include_three_way": not args.no_three_way,
56
+ "system": not args.difference,
57
+ }
58
+ spec = (
59
+ aid_growth_techshare_spec(**kwargs)
60
+ if args.name == "techshare"
61
+ else aid_growth_ta_decomposition_spec(**kwargs)
62
+ )
63
+ print(model_card_markdown(spec))
64
+ return 0
65
+
66
+ parser.print_help(sys.stderr)
67
+ return 2
68
+
69
+
70
+ if __name__ == "__main__":
71
+ raise SystemExit(main())
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class DiagnosticCheck:
8
+ name: str
9
+ value: float | int | None
10
+ passed: bool | None
11
+ interpretation: str
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class DiagnosticReport:
16
+ checks: list[DiagnosticCheck]
17
+ recommendation: str
18
+
19
+ def to_markdown(self) -> str:
20
+ lines = ["| Diagnostic | Value | Pass | Interpretation |", "|---|---:|:---:|---|"]
21
+ for c in self.checks:
22
+ value = (
23
+ ""
24
+ if c.value is None
25
+ else f"{c.value:.4g}"
26
+ if isinstance(c.value, float)
27
+ else str(c.value)
28
+ )
29
+ passed = "—" if c.passed is None else "Yes" if c.passed else "No"
30
+ lines.append(f"| {c.name} | {value} | {passed} | {c.interpretation} |")
31
+ lines.append("")
32
+ lines.append(f"**Recommendation:** {self.recommendation}")
33
+ return "\n".join(lines)
34
+
35
+
36
+ def assess_diagnostics(
37
+ *,
38
+ ar1_p: float | None = None,
39
+ ar2_p: float | None = None,
40
+ hansen_p: float | None = None,
41
+ sargan_p: float | None = None,
42
+ diff_hansen_p: float | None = None,
43
+ n_instruments: int | None = None,
44
+ n_entities: int | None = None,
45
+ ) -> DiagnosticReport:
46
+ """Create a conservative interpretation of System GMM diagnostics."""
47
+
48
+ checks: list[DiagnosticCheck] = []
49
+
50
+ checks.append(
51
+ DiagnosticCheck(
52
+ "AR(1) p-value",
53
+ ar1_p,
54
+ None if ar1_p is None else ar1_p < 0.10,
55
+ "Expected to be significant or near-significant in differenced errors.",
56
+ )
57
+ )
58
+ checks.append(
59
+ DiagnosticCheck(
60
+ "AR(2) p-value",
61
+ ar2_p,
62
+ None if ar2_p is None else ar2_p > 0.10,
63
+ "Should not be significant; rejection implies invalid lag instruments.",
64
+ )
65
+ )
66
+ checks.append(
67
+ DiagnosticCheck(
68
+ "Hansen p-value",
69
+ hansen_p,
70
+ None if hansen_p is None else 0.05 < hansen_p < 0.90,
71
+ "Should not reject, but values near 1 can indicate instrument proliferation.",
72
+ )
73
+ )
74
+ checks.append(
75
+ DiagnosticCheck(
76
+ "Sargan p-value",
77
+ sargan_p,
78
+ None if sargan_p is None else sargan_p > 0.05,
79
+ "Useful under homoskedasticity; less reliable with robust two-step estimation.",
80
+ )
81
+ )
82
+ checks.append(
83
+ DiagnosticCheck(
84
+ "Difference-in-Hansen p-value",
85
+ diff_hansen_p,
86
+ None if diff_hansen_p is None else diff_hansen_p > 0.05,
87
+ "Should not reject validity of additional system/instrument subsets.",
88
+ )
89
+ )
90
+
91
+ instrument_pass: bool | None = None
92
+ instrument_value: float | None = None
93
+ if n_instruments is not None and n_entities is not None and n_entities > 0:
94
+ instrument_value = n_instruments / n_entities
95
+ instrument_pass = n_instruments <= n_entities
96
+ checks.append(
97
+ DiagnosticCheck(
98
+ "Instrument/entity ratio",
99
+ instrument_value,
100
+ instrument_pass,
101
+ "Prefer instruments fewer than, or at least not materially above, number of entities.",
102
+ )
103
+ )
104
+
105
+ failures = [c.name for c in checks if c.passed is False]
106
+ if not failures:
107
+ recommendation = "Diagnostics are broadly defensible. Interpret coefficients with normal dynamic-panel caution."
108
+ elif "AR(2) p-value" in failures:
109
+ recommendation = (
110
+ "Do not rely on this specification until serial-correlation failure is resolved."
111
+ )
112
+ elif "Instrument/entity ratio" in failures or "Hansen p-value" in failures:
113
+ recommendation = "Reduce instrument count: collapse instruments, shorten lag windows, or move weakly endogenous blocks to IV-style treatment."
114
+ else:
115
+ recommendation = (
116
+ "Use as sensitivity evidence only; explain diagnostic weaknesses transparently."
117
+ )
118
+
119
+ return DiagnosticReport(checks=checks, recommendation=recommendation)
@@ -0,0 +1,253 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import warnings
5
+ from contextlib import suppress
6
+ from typing import Any, Literal
7
+
8
+ import pandas as pd
9
+
10
+ from .pydynpd_output_parser import enrich_result_with_parsed_standard_errors
11
+
12
+ DynamicGMMBackend = Literal["auto", "validated", "native", "pydynpd"]
13
+
14
+
15
+ class DynamicPanelBackendError(RuntimeError):
16
+ """Raised when dynamic-panel backend routing fails."""
17
+
18
+
19
+ def _is_system_gmm(spec: Any) -> bool:
20
+ return bool(getattr(spec, "system", False))
21
+
22
+
23
+ def _append_result_note(result: Any, note: str) -> Any:
24
+ """Best-effort note attachment without assuming a mutable result class."""
25
+ with suppress(Exception):
26
+ notes = getattr(result, "notes", None)
27
+
28
+ if notes is None:
29
+ result.notes = [note]
30
+ elif isinstance(notes, list):
31
+ if note not in notes:
32
+ notes.append(note)
33
+ elif isinstance(notes, tuple) and note not in notes:
34
+ result.notes = [*notes, note]
35
+ elif not isinstance(notes, (list, tuple)):
36
+ result.notes = [str(notes), note]
37
+
38
+ return result
39
+
40
+
41
+ def _set_result_attr(result: Any, name: str, value: Any) -> Any:
42
+ """Best-effort result metadata attachment."""
43
+ with suppress(Exception):
44
+ setattr(result, name, value)
45
+
46
+ return result
47
+
48
+
49
+ def _call_pydynpd_backend(
50
+ spec: Any,
51
+ data: pd.DataFrame,
52
+ *,
53
+ entity: str,
54
+ time: str,
55
+ ) -> Any:
56
+ """Call the systemgmmkit pydynpd adapter across known signatures."""
57
+
58
+ try:
59
+ from systemgmmkit.pydynpd_backend import run_pydynpd
60
+ except Exception as exc:
61
+ raise DynamicPanelBackendError(
62
+ "The pydynpd backend could not be imported. Install optional backend "
63
+ "dependencies or use backend='native'."
64
+ ) from exc
65
+
66
+ signature = inspect.signature(run_pydynpd)
67
+ params = signature.parameters
68
+
69
+ if "panel_ids" in params:
70
+ return run_pydynpd(spec, data, panel_ids=(entity, time))
71
+
72
+ attempts = [
73
+ ("panel_ids_tuple_positional", lambda: run_pydynpd(spec, data, (entity, time))),
74
+ ("panel_ids_list_positional", lambda: run_pydynpd(spec, data, [entity, time])),
75
+ ("keywords_entity_time", lambda: run_pydynpd(spec, data, entity=entity, time=time)),
76
+ (
77
+ "keywords_entity_col_time_col",
78
+ lambda: run_pydynpd(spec, data, entity_col=entity, time_col=time),
79
+ ),
80
+ (
81
+ "keywords_id_col_time_col",
82
+ lambda: run_pydynpd(spec, data, id_col=entity, time_col=time),
83
+ ),
84
+ ("positional_entity_time", lambda: run_pydynpd(spec, data, entity, time)),
85
+ ("spec_data_only", lambda: run_pydynpd(spec, data)),
86
+ ]
87
+
88
+ errors: list[str] = []
89
+
90
+ for label, func in attempts:
91
+ try:
92
+ return func()
93
+ except TypeError as exc:
94
+ errors.append(f"{label}: {exc}")
95
+
96
+ raise DynamicPanelBackendError(
97
+ "Could not call the pydynpd backend with any supported adapter signature. "
98
+ "Attempted signatures:\n" + "\n".join(errors)
99
+ )
100
+
101
+
102
+ def _call_native_backend(
103
+ spec: Any,
104
+ data: pd.DataFrame,
105
+ *,
106
+ entity: str,
107
+ time: str,
108
+ ) -> Any:
109
+ try:
110
+ from systemgmmkit.native_gmm import run_native_dynamic_panel_gmm
111
+ except Exception as exc:
112
+ raise DynamicPanelBackendError(
113
+ "The native GMM backend could not be imported."
114
+ ) from exc
115
+
116
+ return run_native_dynamic_panel_gmm(
117
+ spec,
118
+ data,
119
+ entity=entity,
120
+ time=time,
121
+ )
122
+
123
+
124
+ def run_dynamic_panel_gmm(
125
+ spec: Any,
126
+ data: pd.DataFrame,
127
+ *,
128
+ entity: str,
129
+ time: str,
130
+ backend: DynamicGMMBackend = "auto",
131
+ ) -> Any:
132
+ """Run Difference or System GMM through the systemgmmkit public API.
133
+
134
+ Backend policy
135
+ --------------
136
+ backend="auto"
137
+ Difference GMM -> native validated backend.
138
+ System GMM -> validated pydynpd adapter through systemgmmkit.
139
+
140
+ backend="validated"
141
+ Same as "auto", but explicit.
142
+
143
+ backend="native"
144
+ Uses the native systemgmmkit backend. Native System GMM remains
145
+ experimental until coefficient-level parity is certified.
146
+
147
+ backend="pydynpd"
148
+ Explicitly routes through the pydynpd adapter.
149
+
150
+ This keeps systemgmmkit as the user-facing package while allowing a validated
151
+ third-party backend internally where appropriate.
152
+ """
153
+
154
+ if backend not in {"auto", "validated", "native", "pydynpd"}:
155
+ raise ValueError(
156
+ "backend must be one of: 'auto', 'validated', 'native', 'pydynpd'."
157
+ )
158
+
159
+ is_system = _is_system_gmm(spec)
160
+
161
+ if backend in {"auto", "validated"}:
162
+ if is_system:
163
+ result = _call_pydynpd_backend(spec, data, entity=entity, time=time)
164
+ result = enrich_result_with_parsed_standard_errors(result)
165
+ _set_result_attr(result, "backend", "pydynpd-via-systemgmmkit")
166
+ _set_result_attr(result, "systemgmmkit_backend_policy", backend)
167
+ _append_result_note(
168
+ result,
169
+ "System GMM routed through the validated pydynpd adapter by systemgmmkit.",
170
+ )
171
+ return result
172
+
173
+ result = _call_native_backend(spec, data, entity=entity, time=time)
174
+ _set_result_attr(result, "backend", "native-validated-via-systemgmmkit")
175
+ _set_result_attr(result, "systemgmmkit_backend_policy", backend)
176
+ _append_result_note(
177
+ result,
178
+ "Difference GMM routed through the validated native systemgmmkit backend.",
179
+ )
180
+ return result
181
+
182
+ if backend == "pydynpd":
183
+ result = _call_pydynpd_backend(spec, data, entity=entity, time=time)
184
+ result = enrich_result_with_parsed_standard_errors(result)
185
+ _set_result_attr(result, "backend", "pydynpd-via-systemgmmkit")
186
+ _set_result_attr(result, "systemgmmkit_backend_policy", backend)
187
+ return result
188
+
189
+ result = _call_native_backend(spec, data, entity=entity, time=time)
190
+ _set_result_attr(result, "backend", "native-via-systemgmmkit")
191
+ _set_result_attr(result, "systemgmmkit_backend_policy", backend)
192
+
193
+ if is_system:
194
+ warnings.warn(
195
+ "Native System GMM is experimental and not yet xtabond2-certified. "
196
+ "Use backend='auto' or backend='validated' for empirical System GMM.",
197
+ RuntimeWarning,
198
+ stacklevel=2,
199
+ )
200
+ _append_result_note(
201
+ result,
202
+ "Native System GMM is experimental and not yet xtabond2-certified.",
203
+ )
204
+
205
+ return result
206
+
207
+
208
+ def run_system_gmm(
209
+ spec: Any,
210
+ data: pd.DataFrame,
211
+ *,
212
+ entity: str,
213
+ time: str,
214
+ backend: DynamicGMMBackend = "auto",
215
+ ) -> Any:
216
+ """Run a System GMM specification through systemgmmkit."""
217
+
218
+ if not _is_system_gmm(spec):
219
+ raise ValueError(
220
+ "run_system_gmm() expects a System GMM spec with spec.system=True."
221
+ )
222
+
223
+ return run_dynamic_panel_gmm(
224
+ spec,
225
+ data,
226
+ entity=entity,
227
+ time=time,
228
+ backend=backend,
229
+ )
230
+
231
+
232
+ def run_difference_gmm(
233
+ spec: Any,
234
+ data: pd.DataFrame,
235
+ *,
236
+ entity: str,
237
+ time: str,
238
+ backend: DynamicGMMBackend = "auto",
239
+ ) -> Any:
240
+ """Run a Difference GMM specification through systemgmmkit."""
241
+
242
+ if _is_system_gmm(spec):
243
+ raise ValueError(
244
+ "run_difference_gmm() expects a Difference GMM spec with spec.system=False."
245
+ )
246
+
247
+ return run_dynamic_panel_gmm(
248
+ spec,
249
+ data,
250
+ entity=entity,
251
+ time=time,
252
+ backend=backend,
253
+ )