trop 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
trop/__init__.py CHANGED
@@ -1,3 +1,9 @@
1
1
  from .estimator import TROP_TWFE_average
2
+ from .cv import TROP_cv_single, TROP_cv_cycle, TROP_cv_joint
2
3
 
3
- __all__ = ["TROP_TWFE_average"]
4
+ __all__ = [
5
+ "TROP_TWFE_average",
6
+ "TROP_cv_single",
7
+ "TROP_cv_cycle",
8
+ "TROP_cv_joint",
9
+ ]
trop/cv.py ADDED
@@ -0,0 +1,427 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Iterable, Optional, Sequence, Tuple, Union, List
5
+
6
+ import numpy as np
7
+ from joblib import Parallel, delayed
8
+
9
+ from .estimator import TROP_TWFE_average
10
+
11
+
12
+ ArrayLike = Union[np.ndarray, Sequence[Sequence[float]]]
13
+
14
+
15
+ def _validate_panel(Y: np.ndarray, treated_periods: int, n_treated_units: int) -> None:
16
+ """
17
+ Validate panel dimensions and basic placebo-treatment parameters.
18
+
19
+ Parameters
20
+ ----------
21
+ Y:
22
+ Outcome panel of shape (N, T).
23
+ treated_periods:
24
+ Number of treated (post) periods assumed to be the final columns of the panel.
25
+ Must satisfy 1 <= treated_periods < T.
26
+ n_treated_units:
27
+ Number of treated units to sample without replacement from {0, ..., N-1}.
28
+ Must satisfy 1 <= n_treated_units < N.
29
+
30
+ Raises
31
+ ------
32
+ ValueError
33
+ If Y is not 2D, or if treated_periods / n_treated_units are out of range.
34
+ """
35
+ if Y.ndim != 2:
36
+ raise ValueError("Y must be a 2D array of shape (N, T).")
37
+ N, T = Y.shape
38
+ if treated_periods <= 0 or treated_periods >= T:
39
+ raise ValueError(f"treated_periods must be in [1, T-1]. Got treated_periods={treated_periods}, T={T}.")
40
+ if n_treated_units <= 0 or n_treated_units >= N:
41
+ raise ValueError(f"n_treated_units must be in [1, N-1]. Got n_treated_units={n_treated_units}, N={N}.")
42
+
43
+
44
+ def _as_list(grid: Iterable[float]) -> List[float]:
45
+ """
46
+ Convert a lambda grid iterable into a non-empty list of floats.
47
+
48
+ Parameters
49
+ ----------
50
+ grid:
51
+ Iterable of candidate lambda values.
52
+
53
+ Returns
54
+ -------
55
+ List[float]
56
+ The grid converted to a list of floats.
57
+
58
+ Raises
59
+ ------
60
+ ValueError
61
+ If the grid is empty.
62
+ """
63
+ grid_list = list(grid)
64
+ if len(grid_list) == 0:
65
+ raise ValueError("lambda_grid must be non-empty.")
66
+ grid_list = [float(x) for x in grid_list]
67
+ return grid_list
68
+
69
+
70
+
71
+ def _simulate_ate(
72
+ seed: int,
73
+ Y: np.ndarray,
74
+ n_treated_units: int,
75
+ treated_periods: int,
76
+ lambda_unit: float,
77
+ lambda_time: float,
78
+ lambda_nn: float,
79
+ solver: Optional[str] = None,
80
+ verbose: bool = False,
81
+ ) -> float:
82
+ """
83
+ Simulate a single placebo ATE by randomly selecting treated units.
84
+ """
85
+ rng = np.random.default_rng(seed)
86
+ N, _ = Y.shape
87
+ treated_units = rng.choice(N, size=n_treated_units, replace=False)
88
+
89
+ W = np.zeros_like(Y, dtype=float)
90
+ W[treated_units, -treated_periods:] = 1.0
91
+
92
+ return TROP_TWFE_average(
93
+ Y=Y,
94
+ W=W,
95
+ treated_units=treated_units,
96
+ lambda_unit=lambda_unit,
97
+ lambda_time=lambda_time,
98
+ lambda_nn=lambda_nn,
99
+ treated_periods=treated_periods,
100
+ solver=solver,
101
+ verbose=verbose,
102
+ )
103
+
104
+
105
+ def TROP_cv_single(
106
+ Y_control: ArrayLike,
107
+ n_treated_units: int,
108
+ treated_periods: int,
109
+ fixed_lambdas: Tuple[float, float] = (0.0, 0.0),
110
+ lambda_grid: Optional[Iterable[float]] = None,
111
+ lambda_cv: str = "unit",
112
+ *,
113
+ n_trials: int = 200,
114
+ n_jobs: int = -1,
115
+ prefer: str = "threads",
116
+ random_seed: int = 0,
117
+ solver: Optional[str] = None,
118
+ verbose: bool = False,
119
+ ) -> float:
120
+ """
121
+ Cross-validate one lambda parameter while keeping the other two fixed.
122
+
123
+ Parameters
124
+ ----------
125
+ Y_control:
126
+ Control-only panel (N x T) used for placebo CV.
127
+ n_treated_units:
128
+ Number of placebo treated units to sample each trial.
129
+ treated_periods:
130
+ Number of placebo treated (post) periods (assumed final columns).
131
+ fixed_lambdas:
132
+ Tuple of two lambdas to hold fixed; interpretation depends on `lambda_cv`:
133
+ - lambda_cv='unit': fixed_lambdas=(lambda_time, lambda_nn)
134
+ - lambda_cv='time': fixed_lambdas=(lambda_unit, lambda_nn)
135
+ - lambda_cv='nn' : fixed_lambdas=(lambda_unit, lambda_time)
136
+ lambda_grid:
137
+ Grid of candidate values for the lambda being tuned.
138
+ If None, uses np.arange(0, 2, 0.2).
139
+ lambda_cv:
140
+ Which lambda to tune: {'unit','time','nn'}.
141
+ n_trials:
142
+ Number of placebo trials per lambda.
143
+ n_jobs:
144
+ joblib parallelism. -1 uses all available cores.
145
+ prefer:
146
+ joblib backend preference. Use 'threads' by default for solver stability.
147
+ random_seed:
148
+ Seed for generating trial seeds (deterministic CV).
149
+ solver, verbose:
150
+ Passed through to TROP_TWFE_average.
151
+
152
+ Returns
153
+ -------
154
+ float
155
+ Lambda value that minimizes RMSE of placebo ATEs.
156
+ """
157
+ Y = np.asarray(Y_control, dtype=float)
158
+ _validate_panel(Y, treated_periods, n_treated_units)
159
+
160
+ if lambda_cv not in {"unit", "time", "nn"}:
161
+ raise ValueError("lambda_cv must be one of {'unit','time','nn'}.")
162
+
163
+ if lambda_grid is None:
164
+ lambda_grid_list = _as_list(np.arange(0.0, 2.0, 0.2))
165
+ else:
166
+ lambda_grid_list = _as_list(lambda_grid)
167
+
168
+ if n_trials <= 0:
169
+ raise ValueError("n_trials must be positive.")
170
+ if n_jobs == 0 or n_jobs < -1:
171
+ raise ValueError("n_jobs must be -1 or a positive integer.")
172
+
173
+ base_rng = np.random.default_rng(random_seed)
174
+ seeds = base_rng.integers(0, 2**32 - 1, size=n_trials, dtype=np.uint32)
175
+
176
+ scores: List[float] = []
177
+
178
+ for lamb in lambda_grid_list:
179
+ if lamb < 0:
180
+ raise ValueError("Lambda values must be nonnegative.")
181
+
182
+ if lambda_cv == "unit":
183
+ lambda_unit, lambda_time, lambda_nn = lamb, float(fixed_lambdas[0]), float(fixed_lambdas[1])
184
+ elif lambda_cv == "time":
185
+ lambda_unit, lambda_time, lambda_nn = float(fixed_lambdas[0]), lamb, float(fixed_lambdas[1])
186
+ else: # 'nn'
187
+ lambda_unit, lambda_time, lambda_nn = float(fixed_lambdas[0]), float(fixed_lambdas[1]), lamb
188
+
189
+ ates = Parallel(n_jobs=n_jobs, prefer=prefer)(
190
+ delayed(_simulate_ate)(
191
+ int(seed),
192
+ Y,
193
+ n_treated_units,
194
+ treated_periods,
195
+ lambda_unit,
196
+ lambda_time,
197
+ lambda_nn,
198
+ solver,
199
+ verbose,
200
+ )
201
+ for seed in seeds
202
+ )
203
+
204
+ ates_arr = np.asarray(ates, dtype=float)
205
+ ates_arr = ates_arr[np.isfinite(ates_arr)]
206
+
207
+ if ates_arr.size == 0:
208
+ raise RuntimeError(
209
+ f"All placebo trials failed or returned non-finite ATEs for lambda={lamb} "
210
+ f"(lambda_cv='{lambda_cv}'). Consider changing solver/settings."
211
+ )
212
+
213
+ scores.append(float(np.sqrt(np.mean(ates_arr**2))))
214
+
215
+ best_idx = int(np.argmin(scores))
216
+ return float(lambda_grid_list[best_idx])
217
+
218
+
219
+ def TROP_cv_cycle(
220
+ Y_control: ArrayLike,
221
+ n_treated_units: int,
222
+ treated_periods: int,
223
+ unit_grid: Sequence[float],
224
+ time_grid: Sequence[float],
225
+ nn_grid: Sequence[float],
226
+ lambdas_init: Optional[Tuple[float, float, float]] = None,
227
+ *,
228
+ max_iter: int = 50,
229
+ n_trials: int = 200,
230
+ n_jobs: int = -1,
231
+ prefer: str = "threads",
232
+ random_seed: int = 0,
233
+ solver: Optional[str] = None,
234
+ verbose: bool = False,
235
+ ) -> Tuple[float, float, float]:
236
+ """
237
+ Coordinate-descent style cross-validation for (lambda_unit, lambda_time, lambda_nn).
238
+
239
+ This routine alternates between optimizing lambda_unit, lambda_time, and lambda_nn
240
+ (via `TROP_cv_single`) while holding the other two fixed, until it reaches a fixed
241
+ point (no change in the selected lambdas) or until `max_iter` iterations are reached.
242
+
243
+ Parameters
244
+ ----------
245
+ Y_control:
246
+ Control-only panel (N x T) used for placebo CV.
247
+ n_treated_units:
248
+ Number of placebo treated units to sample each trial.
249
+ treated_periods:
250
+ Number of placebo treated (post) periods (assumed final columns).
251
+ unit_grid:
252
+ Grid of candidate values for lambda_unit (unit-distance decay).
253
+ time_grid:
254
+ Grid of candidate values for lambda_time (time-distance decay).
255
+ nn_grid:
256
+ Grid of candidate values for lambda_nn (nuclear-norm penalty).
257
+ lambdas_init:
258
+ Optional initial values (lambda_unit, lambda_time, lambda_nn). If None, initializes
259
+ each lambda to the mean of its corresponding grid.
260
+ max_iter:
261
+ Maximum number of coordinate-descent iterations.
262
+ n_trials:
263
+ Number of placebo trials per grid point in each coordinate update.
264
+ n_jobs:
265
+ joblib parallelism. -1 uses all available cores.
266
+ prefer:
267
+ joblib backend preference. Use 'threads' by default for solver stability.
268
+ random_seed:
269
+ Seed for generating trial seeds (deterministic CV).
270
+ solver, verbose:
271
+ Passed through to TROP_TWFE_average.
272
+
273
+ Returns
274
+ -------
275
+ Tuple[float, float, float]
276
+ (lambda_unit, lambda_time, lambda_nn) at the converged fixed point.
277
+
278
+ Raises
279
+ ------
280
+ RuntimeError
281
+ If the procedure does not converge to a fixed point within `max_iter`.
282
+ """
283
+
284
+ Y = np.asarray(Y_control, dtype=float)
285
+ _validate_panel(Y, treated_periods, n_treated_units)
286
+
287
+ unit_grid_list = _as_list(unit_grid)
288
+ time_grid_list = _as_list(time_grid)
289
+ nn_grid_list = _as_list(nn_grid)
290
+
291
+ if lambdas_init is None:
292
+ lambda_unit = float(np.mean(unit_grid_list))
293
+ lambda_time = float(np.mean(time_grid_list))
294
+ lambda_nn = float(np.mean(nn_grid_list))
295
+ else:
296
+ lambda_unit, lambda_time, lambda_nn = map(float, lambdas_init)
297
+
298
+ for _ in range(max_iter):
299
+ old = (lambda_unit, lambda_time, lambda_nn)
300
+
301
+ lambda_unit = TROP_cv_single(
302
+ Y, n_treated_units, treated_periods,
303
+ fixed_lambdas=(lambda_time, lambda_nn),
304
+ lambda_grid=unit_grid_list,
305
+ lambda_cv="unit",
306
+ n_trials=n_trials, n_jobs=n_jobs, prefer=prefer,
307
+ random_seed=random_seed, solver=solver, verbose=verbose
308
+ )
309
+
310
+ lambda_time = TROP_cv_single(
311
+ Y, n_treated_units, treated_periods,
312
+ fixed_lambdas=(lambda_unit, lambda_nn),
313
+ lambda_grid=time_grid_list,
314
+ lambda_cv="time",
315
+ n_trials=n_trials, n_jobs=n_jobs, prefer=prefer,
316
+ random_seed=random_seed, solver=solver, verbose=verbose
317
+ )
318
+
319
+ lambda_nn = TROP_cv_single(
320
+ Y, n_treated_units, treated_periods,
321
+ fixed_lambdas=(lambda_unit, lambda_time),
322
+ lambda_grid=nn_grid_list,
323
+ lambda_cv="nn",
324
+ n_trials=n_trials, n_jobs=n_jobs, prefer=prefer,
325
+ random_seed=random_seed, solver=solver, verbose=verbose
326
+ )
327
+
328
+ new = (lambda_unit, lambda_time, lambda_nn)
329
+ if new == old:
330
+ return new
331
+
332
+ raise RuntimeError("TROP_cv_cycle did not converge (no fixed point) within max_iter.")
333
+
334
+
335
+ def TROP_cv_joint(
336
+ Y_control: ArrayLike,
337
+ n_treated_units: int,
338
+ treated_periods: int,
339
+ unit_grid: Sequence[float],
340
+ time_grid: Sequence[float],
341
+ nn_grid: Sequence[float],
342
+ *,
343
+ n_trials: int = 200,
344
+ n_jobs: int = -1,
345
+ prefer: str = "threads",
346
+ random_seed: int = 0,
347
+ solver: Optional[str] = None,
348
+ verbose: bool = False,
349
+ ) -> Tuple[float, float, float]:
350
+ """
351
+ Joint grid search over (lambda_unit, lambda_time, lambda_nn).
352
+
353
+ Parameters
354
+ ----------
355
+ Y_control:
356
+ Control-only panel (N x T) used for placebo CV.
357
+ n_treated_units:
358
+ Number of placebo treated units to sample each trial.
359
+ treated_periods:
360
+ Number of placebo treated (post) periods (assumed final columns).
361
+ unit_grid:
362
+ Grid of candidate values for lambda_unit (unit-distance decay).
363
+ time_grid:
364
+ Grid of candidate values for lambda_time (time-distance decay).
365
+ nn_grid:
366
+ Grid of candidate values for lambda_nn (nuclear-norm penalty).
367
+ n_trials:
368
+ Number of placebo trials per (lambda_unit, lambda_time, lambda_nn) triple.
369
+ n_jobs:
370
+ joblib parallelism. -1 uses all available cores.
371
+ prefer:
372
+ joblib backend preference. Use 'threads' by default for solver stability.
373
+ random_seed:
374
+ Seed for generating trial seeds (deterministic CV).
375
+ solver, verbose:
376
+ Passed through to TROP_TWFE_average.
377
+
378
+ Returns
379
+ -------
380
+ Tuple[float, float, float]
381
+ (lambda_unit, lambda_time, lambda_nn) triple that minimizes the RMSE of placebo ATEs.
382
+ """
383
+
384
+ Y = np.asarray(Y_control, dtype=float)
385
+ _validate_panel(Y, treated_periods, n_treated_units)
386
+
387
+ unit_grid_list = _as_list(unit_grid)
388
+ time_grid_list = _as_list(time_grid)
389
+ nn_grid_list = _as_list(nn_grid)
390
+
391
+ base_rng = np.random.default_rng(random_seed)
392
+ seeds = base_rng.integers(0, 2**32 - 1, size=n_trials, dtype=np.uint32)
393
+
394
+ best_params: Optional[Tuple[float, float, float]] = None
395
+ best_score: float = float("inf")
396
+
397
+ for lambda_unit in unit_grid_list:
398
+ for lambda_time in time_grid_list:
399
+ for lambda_nn in nn_grid_list:
400
+ ates = Parallel(n_jobs=n_jobs, prefer=prefer)(
401
+ delayed(_simulate_ate)(
402
+ int(seed),
403
+ Y,
404
+ n_treated_units,
405
+ treated_periods,
406
+ float(lambda_unit),
407
+ float(lambda_time),
408
+ float(lambda_nn),
409
+ solver,
410
+ verbose,
411
+ )
412
+ for seed in seeds
413
+ )
414
+
415
+ ates_arr = np.asarray(ates, dtype=float)
416
+ ates_arr = ates_arr[np.isfinite(ates_arr)]
417
+ if ates_arr.size == 0:
418
+ continue # skip invalid setting
419
+
420
+ score = float(np.sqrt(np.mean(ates_arr**2)))
421
+ if score < best_score:
422
+ best_score = score
423
+ best_params = (float(lambda_unit), float(lambda_time), float(lambda_nn))
424
+
425
+ if best_params is None:
426
+ raise RuntimeError("All parameter combinations failed during joint CV. Check solver/settings.")
427
+ return best_params
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: trop
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: Triply Robust Panel (TROP) estimator: weighted TWFE with optional low-rank adjustment.
5
5
  Author: Susan Athey, Guido Imbens, Zhaonan Qu, Davide Viviano
6
6
  License-Expression: MIT
@@ -24,6 +24,7 @@ Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: numpy>=1.23
26
26
  Requires-Dist: cvxpy>=1.4
27
+ Requires-Dist: joblib>=1.2
27
28
  Requires-Dist: osqp>=0.6.5
28
29
  Requires-Dist: scs>=3.2.4
29
30
  Provides-Extra: dev
@@ -34,18 +35,15 @@ Dynamic: license-file
34
35
 
35
36
  # TROP: Triply Robust Panel Estimator
36
37
 
37
- This package provides a Python implementation of the **Triply Robust Panel (TROP)** estimator introduced in:
38
+ `trop` is a Python package implementing the **Triply Robust Panel (TROP)** estimator for average treatment effects (ATEs) in panel data. The core estimator is expressed as a weighted two-way fixed effects (TWFE) objective, with an optional low-rank regression adjustment via a nuclear-norm penalty.
39
+
40
+
41
+ Reference:
38
42
 
39
43
  > Susan Athey, Guido Imbens, Zhaonan Qu, Davide Viviano (2025).
40
44
  > *Triply Robust Panel Estimators*.
41
45
  > arXiv:2508.21536.
42
46
 
43
- The initial release (v0.1.0) exposes the function:
44
-
45
- - `TROP_TWFE_average(Y, W, treated_units, lambda_unit, lambda_time, lambda_nn, treated_periods=..., solver=...)`
46
-
47
- which estimates an average treatment effect `tau` in panel settings using a weighted TWFE objective with optional low-rank adjustment.
48
-
49
47
  ---
50
48
 
51
49
  ## Installation
@@ -0,0 +1,8 @@
1
+ trop/__init__.py,sha256=B94vrDZevg2l6ijN4lut7wo0MYTEtBTTZhqmMQtq7Qg,205
2
+ trop/cv.py,sha256=7tumpAaAiiK_F8nUUAcAwnecNAfc2XghftgJsNWVsAQ,14017
3
+ trop/estimator.py,sha256=FWMO39GbL6k3Vz5g1V7SpR6t5wP3N81V5gGSFIe65Xw,6001
4
+ trop-0.1.2.dist-info/licenses/LICENSE,sha256=VqjvjioQz04uLYBj4ye0x-_Ss77-WTIuEWWCW_awEz8,1065
5
+ trop-0.1.2.dist-info/METADATA,sha256=YSyeONhxn4JO_NZBKT1ubVa5J96BhlWGtmRi-GJ0rLk,1997
6
+ trop-0.1.2.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
7
+ trop-0.1.2.dist-info/top_level.txt,sha256=jaqQZFm3D5B4vPBAKZtXfEAYnpl9FKsNHqlM49kcwTI,5
8
+ trop-0.1.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +0,0 @@
1
- trop/__init__.py,sha256=DW6eDmMyaY1tQ6wb-EP48fTNtkeUOuvqE5l88d8SnrA,73
2
- trop/estimator.py,sha256=FWMO39GbL6k3Vz5g1V7SpR6t5wP3N81V5gGSFIe65Xw,6001
3
- trop-0.1.1.dist-info/licenses/LICENSE,sha256=VqjvjioQz04uLYBj4ye0x-_Ss77-WTIuEWWCW_awEz8,1065
4
- trop-0.1.1.dist-info/METADATA,sha256=nM0XBF9nad4XGbpHiBmtLnPXs-mKxXM3sDxvt_ALl6Y,2069
5
- trop-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- trop-0.1.1.dist-info/top_level.txt,sha256=jaqQZFm3D5B4vPBAKZtXfEAYnpl9FKsNHqlM49kcwTI,5
7
- trop-0.1.1.dist-info/RECORD,,