systemgmmkit 0.4.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {systemgmmkit-0.4.2/src/systemgmmkit.egg-info → systemgmmkit-0.5.0}/PKG-INFO +53 -25
  2. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/README.md +52 -24
  3. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/pyproject.toml +2 -1
  4. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/__init__.py +17 -2
  5. systemgmmkit-0.5.0/src/systemgmmkit/diagnostics/__init__.py +140 -0
  6. systemgmmkit-0.5.0/src/systemgmmkit/diagnostics/gmm.py +28 -0
  7. systemgmmkit-0.5.0/src/systemgmmkit/diagnostics/panel.py +283 -0
  8. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/diagnostics.py +2 -0
  9. systemgmmkit-0.5.0/src/systemgmmkit/estimators/first_difference.py +82 -0
  10. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/native_gmm.py +535 -22
  11. systemgmmkit-0.5.0/src/systemgmmkit/reporting/__init__.py +101 -0
  12. systemgmmkit-0.5.0/src/systemgmmkit/reporting/parity.py +134 -0
  13. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/reporting.py +1 -0
  14. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0/src/systemgmmkit.egg-info}/PKG-INFO +53 -25
  15. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit.egg-info/SOURCES.txt +9 -1
  16. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/tests/test_parity_native_gmm.py +47 -7
  17. systemgmmkit-0.5.0/tests/test_xtabond2_system_gmm_parity.py +119 -0
  18. systemgmmkit-0.5.0/tests/test_xtabond2_system_gmm_uncorrected_se_baseline.py +102 -0
  19. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/LICENSE +0 -0
  20. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/setup.cfg +0 -0
  21. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/cli.py +0 -0
  22. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/dynamic_panel.py +0 -0
  23. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/fixed_effects.py +0 -0
  24. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/gmm_parity_policy.py +0 -0
  25. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/panel_iv.py +0 -0
  26. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/parity.py +0 -0
  27. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/presets.py +0 -0
  28. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/pydynpd_backend.py +0 -0
  29. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/pydynpd_output_parser.py +0 -0
  30. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/random_effects.py +0 -0
  31. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/spec.py +0 -0
  32. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/suite.py +0 -0
  33. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/tables.py +0 -0
  34. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit/validation.py +0 -0
  35. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit.egg-info/dependency_links.txt +0 -0
  36. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit.egg-info/entry_points.txt +0 -0
  37. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit.egg-info/requires.txt +0 -0
  38. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/src/systemgmmkit.egg-info/top_level.txt +0 -0
  39. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/tests/test_apply_gmm_parity_policy_script.py +0 -0
  40. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/tests/test_fixed_effects.py +0 -0
  41. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/tests/test_generic_presets.py +0 -0
  42. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/tests/test_gmm_parity_policy.py +0 -0
  43. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/tests/test_pydynpd_backend.py +0 -0
  44. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/tests/test_pydynpd_output_parser.py +0 -0
  45. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/tests/test_random_effects_iv_tables.py +0 -0
  46. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/tests/test_spec_command.py +0 -0
  47. {systemgmmkit-0.4.2 → systemgmmkit-0.5.0}/tests/test_validation_diagnostics.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: systemgmmkit
3
- Version: 0.4.2
3
+ Version: 0.5.0
4
4
  Summary: Generic panel-data econometrics workflow helpers for FE, RE, IV/2SLS, and Difference/System GMM in Python.
5
5
  Author: Oluwajuwon Mayomi Akanbi
6
6
  License-Expression: MIT
@@ -47,7 +47,7 @@ Dynamic: license-file
47
47
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
48
48
  [![CI](https://github.com/Akanom/systemgmmkit/actions/workflows/ci.yml/badge.svg)](https://github.com/Akanom/systemgmmkit/actions/workflows/ci.yml)
49
49
  [![Publish](https://github.com/Akanom/systemgmmkit/actions/workflows/publish.yml/badge.svg)](https://github.com/Akanom/systemgmmkit/actions/workflows/publish.yml)
50
- [![Downloads](https://static.pepy.tech/badge/systemgmmkit/month)](https://pepy.tech/project/systemgmmkit)
50
+ [![Downloads](https://img.shields.io/pepy/dm/systemgmmkit)](https://pepy.tech/project/systemgmmkit)
51
51
  `systemgmmkit` is a Python workflow package for panel-data econometrics.
52
52
 
53
53
  It supports reusable model specification, panel validation, static panel estimation, dynamic-panel GMM estimation, backend routing, diagnostics interpretation, reproducible reporting, and regression-table export.
@@ -91,7 +91,7 @@ The package then routes estimation through the appropriate backend.
91
91
  * public `run_system_gmm()` and `run_difference_gmm()` convenience functions;
92
92
  * optional validated backend adapter integration for System GMM;
93
93
  * native Difference GMM estimation;
94
- * experimental native System GMM estimation;
94
+ * native System GMM estimation with verified `xtabond2` baseline parity checks, including Windmeijer-corrected two-step standard-error parity on the certified benchmark;
95
95
  * model-card style reporting for reproducibility;
96
96
  * regression-table export to Markdown, CSV, and LaTeX;
97
97
  * Stata parity-check scaffolding for `xtreg, fe` and `xtabond2` replication workflows.
@@ -100,16 +100,18 @@ The package then routes estimation through the appropriate backend.
100
100
 
101
101
  ## Current validation status
102
102
 
103
- | Estimator | Current status | Interpretation |
104
- | ------------------------------- | ----------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
105
- | Static panel estimators | Active development | Pooled OLS, Fixed Effects, Random Effects, and Panel IV / 2SLS are available for applied workflow use and should be validated against reference packages for critical work. |
106
- | Native Difference GMM | Strict parity passed on current benchmark | Native Difference GMM matches the current validation backend and Stata oracle within numerical tolerance on the tested benchmark. |
107
- | Native System GMM | Experimental parity pending | Native System GMM runs, preserves observation and instrument counts, and passes construction checks, but coefficient-level parity with `xtabond2` is not yet certified. |
108
- | System GMM via `backend="auto"` | Recommended empirical route | System GMM is called through the public `systemgmmkit` API and routed internally to the validated backend adapter. |
103
+ | Estimator | Current status | Interpretation |
104
+ | ------------------------------- | --------------------------------------------------- | -------------- |
105
+ | Static panel estimators | Active development | Pooled OLS, Fixed Effects, Random Effects, and Panel IV / 2SLS are available for applied workflow use and should be validated against reference packages for critical work. |
106
+ | Native Difference GMM | Strict parity passed on current benchmark | Native Difference GMM matches the current validation backend and Stata oracle within numerical tolerance on the tested benchmark. |
107
+ | Native System GMM | `xtabond2` baseline and Windmeijer SE parity passed | Native System GMM matches `xtabond2` on the current collapsed two-step System GMM benchmark for coefficients, raw residual moments (`Z'u`), group-scaled two-step weighting matrix (`A2 / n_groups`), Hansen J, and Windmeijer-corrected two-step standard errors. |
108
+ | System GMM via `backend="auto"` | Stable public workflow route | `backend="auto"` remains the recommended public workflow route unless the user needs explicit native/adapter comparison. Users who need exact replication should report the selected backend and validation benchmark. |
109
109
 
110
- The current validation harness confirms that native Difference GMM passes strict parity on the benchmark specification.
110
+ The current validation harness confirms strict parity for native Difference GMM on the benchmark specification.
111
111
 
112
- Native System GMM is intentionally marked as experimental until broader coefficient-parity tests pass across multiple datasets, panel structures, lag windows, missing-data patterns, and specifications.
112
+ Native System GMM now passes a dedicated `xtabond2` baseline parity benchmark. The verified benchmark covers coefficient estimates, raw residual moments (`Z'u`), the group-scaled two-step weighting matrix (`A2 / n_groups`), the Hansen J statistic, and Windmeijer-corrected two-step standard errors.
113
+
114
+ This should be interpreted as a strong benchmark-specific parity result, not as a universal claim of Stata identity across every possible dataset, lag window, missing-data pattern, instrument classification, covariance assumption, or finite-sample correction. Broader specification coverage remains on the validation roadmap.
113
115
 
114
116
  ---
115
117
 
@@ -123,18 +125,18 @@ Users should call the public API:
123
125
  from systemgmmkit import run_system_gmm, run_difference_gmm
124
126
  ```
125
127
 
126
- The package then routes estimation through the appropriate backend.
128
+ The package then routes estimation through the selected backend.
127
129
 
128
- | User option | Difference GMM behavior | System GMM behavior |
129
- | --------------------- | ------------------------------------------------------------- | ---------------------------------------------------------------- |
130
- | `backend="auto"` | Uses the validated native `systemgmmkit` Difference GMM path. | Routes through the validated backend adapter via `systemgmmkit`. |
131
- | `backend="validated"` | Uses the validated native `systemgmmkit` Difference GMM path. | Routes through the validated backend adapter via `systemgmmkit`. |
132
- | `backend="native"` | Uses the native `systemgmmkit` engine. | Uses the native `systemgmmkit` engine, currently experimental. |
133
- | `backend="pydynpd"` | Explicitly routes through the backend adapter. | Explicitly routes through the backend adapter. |
130
+ | User option | Difference GMM behavior | System GMM behavior |
131
+ | --------------------- | ------------------------------------------------------------- | ------------------- |
132
+ | `backend="auto"` | Uses the validated native `systemgmmkit` Difference GMM path. | Uses the package's configured stable System GMM route. This is the recommended default workflow unless the user needs a specific backend. |
133
+ | `backend="validated"` | Uses the validated native `systemgmmkit` Difference GMM path. | Routes through the validated backend adapter where available. |
134
+ | `backend="native"` | Uses the native `systemgmmkit` engine. | Uses the native `systemgmmkit` engine. The current `xtabond2` parity benchmark is passed for collapsed two-step System GMM coefficients, moments, group-scaled A2, Hansen J, and Windmeijer-corrected two-step standard errors. |
135
+ | `backend="pydynpd"` | Explicitly routes through the backend adapter. | Explicitly routes through the backend adapter. |
134
136
 
135
- This design keeps `systemgmmkit` as the stable public interface while allowing backend routing internally.
137
+ This design keeps `systemgmmkit` as the stable public interface while allowing explicit backend selection for replication, benchmarking, and sensitivity analysis.
136
138
 
137
- For empirical System GMM work requiring the strongest validation, use:
139
+ For empirical System GMM work, a typical public workflow is:
138
140
 
139
141
  ```python
140
142
  result = run_system_gmm(
@@ -146,7 +148,7 @@ result = run_system_gmm(
146
148
  )
147
149
  ```
148
150
 
149
- This keeps the user workflow inside `systemgmmkit` while routing internally to the validated backend path.
151
+ For strict native replication of the current `xtabond2` parity benchmark, use `backend="native"` and match the sample, lag windows, collapsed-instrument setting, IV treatment, time-dummy treatment, transformation, covariance assumptions, and estimation options.
150
152
 
151
153
  ---
152
154
 
@@ -174,7 +176,7 @@ The construction logic has been validated across:
174
176
  * specifications with and without standard IV controls;
175
177
  * single and multiple GMM-style instrument blocks.
176
178
 
177
- This is a construction-architecture milestone, not a final claim of universal System GMM coefficient parity.
179
+ This construction architecture now supports the current native System GMM `xtabond2` baseline parity result. It should still be interpreted conservatively: the benchmark verifies a specific collapsed two-step System GMM specification, not universal equivalence across all possible panel designs and covariance corrections.
178
180
 
179
181
  ---
180
182
 
@@ -376,7 +378,7 @@ result = run_system_gmm(
376
378
 
377
379
  System GMM follows the Blundell-Bond dynamic-panel structure and combines transformed-equation moments with level-equation moments.
378
380
 
379
- Native System GMM is currently experimental. Use `backend="auto"` for empirical System GMM workflows requiring stronger external validation through the package’s validated backend route.
381
+ Native System GMM now passes a dedicated `xtabond2` benchmark for collapsed two-step System GMM coefficients, residual moments, group-scaled two-step weighting matrix, Hansen J, and Windmeijer-corrected two-step standard errors. Broader specification coverage remains under validation, so users should report the backend, model specification, instrument count, covariance type, and validation context for critical empirical work.
380
382
 
381
383
  ---
382
384
 
@@ -685,7 +687,7 @@ Variable classification is an econometric assumption.
685
687
  Supported native GMM features include:
686
688
 
687
689
  * Difference GMM;
688
- * experimental System GMM;
690
+ * System GMM with verified `xtabond2` baseline parity for the current collapsed two-step benchmark, including Windmeijer-corrected two-step standard-error parity;
689
691
  * collapsed instruments;
690
692
  * restricted lag windows;
691
693
  * one-step and two-step estimation paths;
@@ -696,6 +698,17 @@ Supported native GMM features include:
696
698
 
697
699
  The native backend is intended to provide a transparent Python implementation that can be inspected, tested, and extended without relying only on an external backend.
698
700
 
701
+ The native System GMM parity benchmark currently verifies:
702
+
703
+ * coefficient estimates against `xtabond2`;
704
+ * raw residual moments (`Z'u`) after instrument-order mapping;
705
+ * two-step weighting matrix alignment after group scaling (`A2 / n_groups`);
706
+ * Hansen J statistic alignment;
707
+ * Windmeijer-corrected two-step standard-error alignment against Stata `e(V)`;
708
+ * automated pytest regression guarding for the benchmark.
709
+
710
+ The remaining high-priority validation work is broader benchmark coverage across alternative datasets, lag windows, missing-data structures, instrument classifications, covariance assumptions, and diagnostic outputs.
711
+
699
712
  ---
700
713
 
701
714
  ## Backend adapter
@@ -763,7 +776,7 @@ For dynamic-panel GMM, users should record at minimum:
763
776
 
764
777
  ## Validation roadmap
765
778
 
766
- Before claiming broader production certification across panel designs, the package should be tested on:
779
+ Before claiming broader production certification across panel designs, the package should continue to be tested on:
767
780
 
768
781
  * balanced panels;
769
782
  * unbalanced panels;
@@ -779,6 +792,13 @@ Before claiming broader production certification across panel designs, the packa
779
792
  * alternative instrument classifications;
780
793
  * Stata `xtabond2` replication benchmarks.
781
794
 
795
+ High-priority remaining validation items:
796
+
797
+ * broader System GMM parity across multiple specifications;
798
+ * broader Windmeijer-corrected standard-error parity across multiple specifications;
799
+ * robustness of AR(1), AR(2), Sargan, and Hansen diagnostics across panel structures;
800
+ * documentation of exact Stata-compatible options and known non-equivalence cases.
801
+
782
802
  This roadmap protects the package from overclaiming and supports academically defensible validation.
783
803
 
784
804
  ---
@@ -816,3 +836,11 @@ Estimation was performed using systemgmmkit version X.Y.Z, commit <commit-hash>.
816
836
 
817
837
 
818
838
 
839
+
840
+
841
+
842
+
843
+
844
+
845
+
846
+
@@ -5,7 +5,7 @@
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
6
6
  [![CI](https://github.com/Akanom/systemgmmkit/actions/workflows/ci.yml/badge.svg)](https://github.com/Akanom/systemgmmkit/actions/workflows/ci.yml)
7
7
  [![Publish](https://github.com/Akanom/systemgmmkit/actions/workflows/publish.yml/badge.svg)](https://github.com/Akanom/systemgmmkit/actions/workflows/publish.yml)
8
- [![Downloads](https://static.pepy.tech/badge/systemgmmkit/month)](https://pepy.tech/project/systemgmmkit)
8
+ [![Downloads](https://img.shields.io/pepy/dm/systemgmmkit)](https://pepy.tech/project/systemgmmkit)
9
9
  `systemgmmkit` is a Python workflow package for panel-data econometrics.
10
10
 
11
11
  It supports reusable model specification, panel validation, static panel estimation, dynamic-panel GMM estimation, backend routing, diagnostics interpretation, reproducible reporting, and regression-table export.
@@ -49,7 +49,7 @@ The package then routes estimation through the appropriate backend.
49
49
  * public `run_system_gmm()` and `run_difference_gmm()` convenience functions;
50
50
  * optional validated backend adapter integration for System GMM;
51
51
  * native Difference GMM estimation;
52
- * experimental native System GMM estimation;
52
+ * native System GMM estimation with verified `xtabond2` baseline parity checks, including Windmeijer-corrected two-step standard-error parity on the certified benchmark;
53
53
  * model-card style reporting for reproducibility;
54
54
  * regression-table export to Markdown, CSV, and LaTeX;
55
55
  * Stata parity-check scaffolding for `xtreg, fe` and `xtabond2` replication workflows.
@@ -58,16 +58,18 @@ The package then routes estimation through the appropriate backend.
58
58
 
59
59
  ## Current validation status
60
60
 
61
- | Estimator | Current status | Interpretation |
62
- | ------------------------------- | ----------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
63
- | Static panel estimators | Active development | Pooled OLS, Fixed Effects, Random Effects, and Panel IV / 2SLS are available for applied workflow use and should be validated against reference packages for critical work. |
64
- | Native Difference GMM | Strict parity passed on current benchmark | Native Difference GMM matches the current validation backend and Stata oracle within numerical tolerance on the tested benchmark. |
65
- | Native System GMM | Experimental parity pending | Native System GMM runs, preserves observation and instrument counts, and passes construction checks, but coefficient-level parity with `xtabond2` is not yet certified. |
66
- | System GMM via `backend="auto"` | Recommended empirical route | System GMM is called through the public `systemgmmkit` API and routed internally to the validated backend adapter. |
61
+ | Estimator | Current status | Interpretation |
62
+ | ------------------------------- | --------------------------------------------------- | -------------- |
63
+ | Static panel estimators | Active development | Pooled OLS, Fixed Effects, Random Effects, and Panel IV / 2SLS are available for applied workflow use and should be validated against reference packages for critical work. |
64
+ | Native Difference GMM | Strict parity passed on current benchmark | Native Difference GMM matches the current validation backend and Stata oracle within numerical tolerance on the tested benchmark. |
65
+ | Native System GMM | `xtabond2` baseline and Windmeijer SE parity passed | Native System GMM matches `xtabond2` on the current collapsed two-step System GMM benchmark for coefficients, raw residual moments (`Z'u`), group-scaled two-step weighting matrix (`A2 / n_groups`), Hansen J, and Windmeijer-corrected two-step standard errors. |
66
+ | System GMM via `backend="auto"` | Stable public workflow route | `backend="auto"` remains the recommended public workflow route unless the user needs explicit native/adapter comparison. Users who need exact replication should report the selected backend and validation benchmark. |
67
67
 
68
- The current validation harness confirms that native Difference GMM passes strict parity on the benchmark specification.
68
+ The current validation harness confirms strict parity for native Difference GMM on the benchmark specification.
69
69
 
70
- Native System GMM is intentionally marked as experimental until broader coefficient-parity tests pass across multiple datasets, panel structures, lag windows, missing-data patterns, and specifications.
70
+ Native System GMM now passes a dedicated `xtabond2` baseline parity benchmark. The verified benchmark covers coefficient estimates, raw residual moments (`Z'u`), the group-scaled two-step weighting matrix (`A2 / n_groups`), the Hansen J statistic, and Windmeijer-corrected two-step standard errors.
71
+
72
+ This should be interpreted as a strong benchmark-specific parity result, not as a universal claim of Stata identity across every possible dataset, lag window, missing-data pattern, instrument classification, covariance assumption, or finite-sample correction. Broader specification coverage remains on the validation roadmap.
71
73
 
72
74
  ---
73
75
 
@@ -81,18 +83,18 @@ Users should call the public API:
81
83
  from systemgmmkit import run_system_gmm, run_difference_gmm
82
84
  ```
83
85
 
84
- The package then routes estimation through the appropriate backend.
86
+ The package then routes estimation through the selected backend.
85
87
 
86
- | User option | Difference GMM behavior | System GMM behavior |
87
- | --------------------- | ------------------------------------------------------------- | ---------------------------------------------------------------- |
88
- | `backend="auto"` | Uses the validated native `systemgmmkit` Difference GMM path. | Routes through the validated backend adapter via `systemgmmkit`. |
89
- | `backend="validated"` | Uses the validated native `systemgmmkit` Difference GMM path. | Routes through the validated backend adapter via `systemgmmkit`. |
90
- | `backend="native"` | Uses the native `systemgmmkit` engine. | Uses the native `systemgmmkit` engine, currently experimental. |
91
- | `backend="pydynpd"` | Explicitly routes through the backend adapter. | Explicitly routes through the backend adapter. |
88
+ | User option | Difference GMM behavior | System GMM behavior |
89
+ | --------------------- | ------------------------------------------------------------- | ------------------- |
90
+ | `backend="auto"` | Uses the validated native `systemgmmkit` Difference GMM path. | Uses the package's configured stable System GMM route. This is the recommended default workflow unless the user needs a specific backend. |
91
+ | `backend="validated"` | Uses the validated native `systemgmmkit` Difference GMM path. | Routes through the validated backend adapter where available. |
92
+ | `backend="native"` | Uses the native `systemgmmkit` engine. | Uses the native `systemgmmkit` engine. The current `xtabond2` parity benchmark is passed for collapsed two-step System GMM coefficients, moments, group-scaled A2, Hansen J, and Windmeijer-corrected two-step standard errors. |
93
+ | `backend="pydynpd"` | Explicitly routes through the backend adapter. | Explicitly routes through the backend adapter. |
92
94
 
93
- This design keeps `systemgmmkit` as the stable public interface while allowing backend routing internally.
95
+ This design keeps `systemgmmkit` as the stable public interface while allowing explicit backend selection for replication, benchmarking, and sensitivity analysis.
94
96
 
95
- For empirical System GMM work requiring the strongest validation, use:
97
+ For empirical System GMM work, a typical public workflow is:
96
98
 
97
99
  ```python
98
100
  result = run_system_gmm(
@@ -104,7 +106,7 @@ result = run_system_gmm(
104
106
  )
105
107
  ```
106
108
 
107
- This keeps the user workflow inside `systemgmmkit` while routing internally to the validated backend path.
109
+ For strict native replication of the current `xtabond2` parity benchmark, use `backend="native"` and match the sample, lag windows, collapsed-instrument setting, IV treatment, time-dummy treatment, transformation, covariance assumptions, and estimation options.
108
110
 
109
111
  ---
110
112
 
@@ -132,7 +134,7 @@ The construction logic has been validated across:
132
134
  * specifications with and without standard IV controls;
133
135
  * single and multiple GMM-style instrument blocks.
134
136
 
135
- This is a construction-architecture milestone, not a final claim of universal System GMM coefficient parity.
137
+ This construction architecture now supports the current native System GMM `xtabond2` baseline parity result. It should still be interpreted conservatively: the benchmark verifies a specific collapsed two-step System GMM specification, not universal equivalence across all possible panel designs and covariance corrections.
136
138
 
137
139
  ---
138
140
 
@@ -334,7 +336,7 @@ result = run_system_gmm(
334
336
 
335
337
  System GMM follows the Blundell-Bond dynamic-panel structure and combines transformed-equation moments with level-equation moments.
336
338
 
337
- Native System GMM is currently experimental. Use `backend="auto"` for empirical System GMM workflows requiring stronger external validation through the package’s validated backend route.
339
+ Native System GMM now passes a dedicated `xtabond2` benchmark for collapsed two-step System GMM coefficients, residual moments, group-scaled two-step weighting matrix, Hansen J, and Windmeijer-corrected two-step standard errors. Broader specification coverage remains under validation, so users should report the backend, model specification, instrument count, covariance type, and validation context for critical empirical work.
338
340
 
339
341
  ---
340
342
 
@@ -643,7 +645,7 @@ Variable classification is an econometric assumption.
643
645
  Supported native GMM features include:
644
646
 
645
647
  * Difference GMM;
646
- * experimental System GMM;
648
+ * System GMM with verified `xtabond2` baseline parity for the current collapsed two-step benchmark, including Windmeijer-corrected two-step standard-error parity;
647
649
  * collapsed instruments;
648
650
  * restricted lag windows;
649
651
  * one-step and two-step estimation paths;
@@ -654,6 +656,17 @@ Supported native GMM features include:
654
656
 
655
657
  The native backend is intended to provide a transparent Python implementation that can be inspected, tested, and extended without relying only on an external backend.
656
658
 
659
+ The native System GMM parity benchmark currently verifies:
660
+
661
+ * coefficient estimates against `xtabond2`;
662
+ * raw residual moments (`Z'u`) after instrument-order mapping;
663
+ * two-step weighting matrix alignment after group scaling (`A2 / n_groups`);
664
+ * Hansen J statistic alignment;
665
+ * Windmeijer-corrected two-step standard-error alignment against Stata `e(V)`;
666
+ * automated pytest regression guarding for the benchmark.
667
+
668
+ The remaining high-priority validation work is broader benchmark coverage across alternative datasets, lag windows, missing-data structures, instrument classifications, covariance assumptions, and diagnostic outputs.
669
+
657
670
  ---
658
671
 
659
672
  ## Backend adapter
@@ -721,7 +734,7 @@ For dynamic-panel GMM, users should record at minimum:
721
734
 
722
735
  ## Validation roadmap
723
736
 
724
- Before claiming broader production certification across panel designs, the package should be tested on:
737
+ Before claiming broader production certification across panel designs, the package should continue to be tested on:
725
738
 
726
739
  * balanced panels;
727
740
  * unbalanced panels;
@@ -737,6 +750,13 @@ Before claiming broader production certification across panel designs, the packa
737
750
  * alternative instrument classifications;
738
751
  * Stata `xtabond2` replication benchmarks.
739
752
 
753
+ High-priority remaining validation items:
754
+
755
+ * broader System GMM parity across multiple specifications;
756
+ * broader Windmeijer-corrected standard-error parity across multiple specifications;
757
+ * robustness of AR(1), AR(2), Sargan, and Hansen diagnostics across panel structures;
758
+ * documentation of exact Stata-compatible options and known non-equivalence cases.
759
+
740
760
  This roadmap protects the package from overclaiming and supports academically defensible validation.
741
761
 
742
762
  ---
@@ -774,3 +794,11 @@ Estimation was performed using systemgmmkit version X.Y.Z, commit <commit-hash>.
774
794
 
775
795
 
776
796
 
797
+
798
+
799
+
800
+
801
+
802
+
803
+
804
+
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "systemgmmkit"
7
- version = "0.4.2"
7
+ version = "0.5.0"
8
8
  description = "Generic panel-data econometrics workflow helpers for FE, RE, IV/2SLS, and Difference/System GMM in Python."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -66,3 +66,4 @@ quote-style = "double"
66
66
  indent-style = "space"
67
67
  line-ending = "auto"
68
68
 
69
+
@@ -68,9 +68,16 @@ __all__ = [
68
68
  "DynamicPanelBackendError",
69
69
  "run_dynamic_panel_gmm",
70
70
  "run_system_gmm",
71
- "run_difference_gmm",]
71
+ "run_difference_gmm", "FirstDifferenceResult",
72
+ "ParityReport",
73
+ "ParityResult",
74
+ "classify_parity_result",
75
+ "first_difference",
76
+ ]
72
77
 
73
- __version__ = "0.4.1"
78
+ __version__ = "0.5.0"
79
+
80
+ import contextlib
74
81
 
75
82
  from .dynamic_panel import (
76
83
  DynamicPanelBackendError,
@@ -78,3 +85,11 @@ from .dynamic_panel import (
78
85
  run_dynamic_panel_gmm,
79
86
  run_system_gmm,
80
87
  )
88
+
89
+ with contextlib.suppress(Exception):
90
+ from .estimators.first_difference import FirstDifferenceResult, first_difference
91
+
92
+ with contextlib.suppress(Exception):
93
+ from .reporting import ParityReport, ParityResult, classify_parity_result
94
+
95
+ from .estimators.first_difference import FirstDifferenceResult, first_difference
@@ -0,0 +1,140 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from .gmm import GmmDiagnostics
6
+ from .panel import (
7
+ DiagnosticResult,
8
+ breusch_pagan_lm,
9
+ hausman_fe_re,
10
+ modified_wald_groupwise_heteroskedasticity,
11
+ pesaran_cd,
12
+ wooldridge_serial_correlation,
13
+ )
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class DiagnosticCheck:
18
+ name: str
19
+ value: float | int | None
20
+ passed: bool | None
21
+ interpretation: str
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class DiagnosticReport:
26
+ checks: list[DiagnosticCheck]
27
+ recommendation: str
28
+
29
+ def to_markdown(self) -> str:
30
+ lines = ["| Diagnostic | Value | Pass | Interpretation |", "|---|---:|:---:|---|"]
31
+ for c in self.checks:
32
+ value = (
33
+ ""
34
+ if c.value is None
35
+ else f"{c.value:.4g}"
36
+ if isinstance(c.value, float)
37
+ else str(c.value)
38
+ )
39
+ passed = "—" if c.passed is None else "Yes" if c.passed else "No"
40
+ lines.append(f"| {c.name} | {value} | {passed} | {c.interpretation} |")
41
+ lines.append("")
42
+ lines.append(f"**Recommendation:** {self.recommendation}")
43
+ return "\n".join(lines)
44
+
45
+
46
+ def assess_diagnostics(
47
+ *,
48
+ ar1_p: float | None = None,
49
+ ar2_p: float | None = None,
50
+ hansen_p: float | None = None,
51
+ sargan_p: float | None = None,
52
+ diff_hansen_p: float | None = None,
53
+ n_instruments: int | None = None,
54
+ n_entities: int | None = None,
55
+ ) -> DiagnosticReport:
56
+ checks: list[DiagnosticCheck] = []
57
+
58
+ checks.append(
59
+ DiagnosticCheck(
60
+ "AR(1) p-value",
61
+ ar1_p,
62
+ None if ar1_p is None else ar1_p < 0.10,
63
+ "Expected to be significant or near-significant in differenced errors.",
64
+ )
65
+ )
66
+ checks.append(
67
+ DiagnosticCheck(
68
+ "AR(2) p-value",
69
+ ar2_p,
70
+ None if ar2_p is None else ar2_p > 0.10,
71
+ "Should not be significant; rejection implies invalid lag instruments.",
72
+ )
73
+ )
74
+ checks.append(
75
+ DiagnosticCheck(
76
+ "Hansen p-value",
77
+ hansen_p,
78
+ None if hansen_p is None else 0.05 < hansen_p < 0.90,
79
+ "Should not reject, but values near 1 can indicate instrument proliferation.",
80
+ )
81
+ )
82
+ checks.append(
83
+ DiagnosticCheck(
84
+ "Sargan p-value",
85
+ sargan_p,
86
+ None if sargan_p is None else sargan_p > 0.05,
87
+ "Useful under homoskedasticity; less reliable with robust two-step estimation.",
88
+ )
89
+ )
90
+ checks.append(
91
+ DiagnosticCheck(
92
+ "Difference-in-Hansen p-value",
93
+ diff_hansen_p,
94
+ None if diff_hansen_p is None else diff_hansen_p > 0.05,
95
+ "Should not reject validity of additional system/instrument subsets.",
96
+ )
97
+ )
98
+
99
+ instrument_pass: bool | None = None
100
+ instrument_value: float | None = None
101
+
102
+ if n_instruments is not None and n_entities is not None and n_entities > 0:
103
+ instrument_value = n_instruments / n_entities
104
+ instrument_pass = n_instruments <= n_entities
105
+
106
+ checks.append(
107
+ DiagnosticCheck(
108
+ "Instrument/entity ratio",
109
+ instrument_value,
110
+ instrument_pass,
111
+ "Prefer instruments fewer than, or at least not materially above, number of entities.",
112
+ )
113
+ )
114
+
115
+ failures = [c.name for c in checks if c.passed is False]
116
+
117
+ if not failures:
118
+ recommendation = "Diagnostics are broadly defensible. Interpret coefficients with normal dynamic-panel caution."
119
+ elif "AR(2) p-value" in failures:
120
+ recommendation = "Do not rely on this specification until serial-correlation failure is resolved."
121
+ elif "Instrument/entity ratio" in failures or "Hansen p-value" in failures:
122
+ recommendation = "Reduce instrument count: collapse instruments, shorten lag windows, or move weakly endogenous blocks to IV-style treatment."
123
+ else:
124
+ recommendation = "Use as sensitivity evidence only; explain diagnostic weaknesses transparently."
125
+
126
+ return DiagnosticReport(checks=checks, recommendation=recommendation)
127
+
128
+
129
+ __all__ = [
130
+ "DiagnosticCheck",
131
+ "DiagnosticReport",
132
+ "DiagnosticResult",
133
+ "GmmDiagnostics",
134
+ "assess_diagnostics",
135
+ "breusch_pagan_lm",
136
+ "hausman_fe_re",
137
+ "modified_wald_groupwise_heteroskedasticity",
138
+ "pesaran_cd",
139
+ "wooldridge_serial_correlation",
140
+ ]
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class GmmDiagnostics:
8
+ ar1_pvalue: float | None = None
9
+ ar2_pvalue: float | None = None
10
+ hansen_pvalue: float | None = None
11
+ sargan_pvalue: float | None = None
12
+ diff_hansen_pvalue: float | None = None
13
+ n_instruments: int | None = None
14
+ n_groups: int | None = None
15
+
16
+ @property
17
+ def instrument_pressure_ratio(self) -> float | None:
18
+ if self.n_instruments is None or self.n_groups in (None, 0):
19
+ return None
20
+ return self.n_instruments / self.n_groups
21
+
22
+ @property
23
+ def passes_basic_gmm_diagnostics(self) -> bool:
24
+ if self.ar2_pvalue is not None and self.ar2_pvalue < 0.05:
25
+ return False
26
+ if self.hansen_pvalue is not None and self.hansen_pvalue < 0.05:
27
+ return False
28
+ return not (self.instrument_pressure_ratio is not None and self.instrument_pressure_ratio >= 1.0)