taxcalc 4.2.1__py3-none-any.whl → 4.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. taxcalc/__init__.py +1 -1
  2. taxcalc/assumptions/ASSUMPTIONS.md +53 -0
  3. taxcalc/assumptions/README.md +17 -0
  4. taxcalc/assumptions/economic_assumptions_template.json +77 -0
  5. taxcalc/calcfunctions.py +7 -4
  6. taxcalc/data.py +10 -5
  7. taxcalc/policy_current_law.json +2033 -184
  8. taxcalc/reforms/2017_law.json +125 -0
  9. taxcalc/reforms/2017_law.out.csv +10 -0
  10. taxcalc/reforms/ARPA.json +78 -0
  11. taxcalc/reforms/ARPA.out.csv +10 -0
  12. taxcalc/reforms/BrownKhanna.json +23 -0
  13. taxcalc/reforms/BrownKhanna.out.csv +10 -0
  14. taxcalc/reforms/CARES.json +40 -0
  15. taxcalc/reforms/CARES.out.csv +10 -0
  16. taxcalc/reforms/ConsolidatedAppropriationsAct2021.json +15 -0
  17. taxcalc/reforms/ConsolidatedAppropriationsAct2021.out.csv +10 -0
  18. taxcalc/reforms/Larson2019.json +36 -0
  19. taxcalc/reforms/Larson2019.out.csv +10 -0
  20. taxcalc/reforms/README.md +22 -0
  21. taxcalc/reforms/REFORMS.md +92 -0
  22. taxcalc/reforms/Renacci.json +61 -0
  23. taxcalc/reforms/Renacci.out.csv +10 -0
  24. taxcalc/reforms/SandersDeFazio.json +15 -0
  25. taxcalc/reforms/SandersDeFazio.out.csv +10 -0
  26. taxcalc/reforms/TCJA.json +160 -0
  27. taxcalc/reforms/TCJA.md +48 -0
  28. taxcalc/reforms/TCJA.out.csv +10 -0
  29. taxcalc/reforms/Trump2016.json +71 -0
  30. taxcalc/reforms/Trump2016.out.csv +10 -0
  31. taxcalc/reforms/Trump2017.json +51 -0
  32. taxcalc/reforms/Trump2017.out.csv +10 -0
  33. taxcalc/reforms/archive/Clinton2016.json +56 -0
  34. taxcalc/reforms/archive/RyanBrady.json +104 -0
  35. taxcalc/reforms/archive/TCJA_House.json +144 -0
  36. taxcalc/reforms/archive/TCJA_House_Amended.json +152 -0
  37. taxcalc/reforms/archive/TCJA_Reconciliation.json +187 -0
  38. taxcalc/reforms/archive/TCJA_Senate.json +116 -0
  39. taxcalc/reforms/archive/TCJA_Senate_111417.json +169 -0
  40. taxcalc/reforms/archive/TCJA_Senate_120117.json +174 -0
  41. taxcalc/reforms/cases.csv +10 -0
  42. taxcalc/reforms/clp.out.csv +10 -0
  43. taxcalc/reforms/ext.json +59 -0
  44. taxcalc/reforms/growfactors_ext.csv +65 -0
  45. taxcalc/reforms/ptaxes0.json +37 -0
  46. taxcalc/reforms/ptaxes0.out.csv +10 -0
  47. taxcalc/reforms/ptaxes1.json +21 -0
  48. taxcalc/reforms/ptaxes1.out.csv +10 -0
  49. taxcalc/reforms/ptaxes2.json +18 -0
  50. taxcalc/reforms/ptaxes2.out.csv +10 -0
  51. taxcalc/reforms/ptaxes3.json +28 -0
  52. taxcalc/reforms/ptaxes3.out.csv +10 -0
  53. taxcalc/reforms/rounding2022.json +153 -0
  54. taxcalc/reforms/rounding2022.out.csv +10 -0
  55. taxcalc/tests/benefits_expect.csv +169 -0
  56. taxcalc/tests/cmpi_cps_expect.txt +132 -0
  57. taxcalc/tests/cmpi_puf_expect.txt +132 -0
  58. taxcalc/tests/conftest.py +143 -0
  59. taxcalc/tests/cpscsv_agg_expect.csv +26 -0
  60. taxcalc/tests/puf_var_correl_coeffs_2016.csv +80 -0
  61. taxcalc/tests/puf_var_wght_means_by_year.csv +80 -0
  62. taxcalc/tests/pufcsv_agg_expect.csv +26 -0
  63. taxcalc/tests/pufcsv_mtr_expect.txt +63 -0
  64. taxcalc/tests/reforms.json +649 -0
  65. taxcalc/tests/reforms_expect.csv +65 -0
  66. taxcalc/tests/test_4package.py +67 -0
  67. taxcalc/tests/test_benefits.py +86 -0
  68. taxcalc/tests/test_calcfunctions.py +871 -0
  69. taxcalc/tests/test_calculator.py +1021 -0
  70. taxcalc/tests/test_compare.py +336 -0
  71. taxcalc/tests/test_compatible_data.py +338 -0
  72. taxcalc/tests/test_consumption.py +144 -0
  73. taxcalc/tests/test_cpscsv.py +163 -0
  74. taxcalc/tests/test_data.py +133 -0
  75. taxcalc/tests/test_decorators.py +332 -0
  76. taxcalc/tests/test_growdiff.py +102 -0
  77. taxcalc/tests/test_growfactors.py +94 -0
  78. taxcalc/tests/test_parameters.py +617 -0
  79. taxcalc/tests/test_policy.py +1575 -0
  80. taxcalc/tests/test_puf_var_stats.py +194 -0
  81. taxcalc/tests/test_pufcsv.py +385 -0
  82. taxcalc/tests/test_records.py +234 -0
  83. taxcalc/tests/test_reforms.py +385 -0
  84. taxcalc/tests/test_responses.py +41 -0
  85. taxcalc/tests/test_taxcalcio.py +755 -0
  86. taxcalc/tests/test_tmdcsv.py +38 -0
  87. taxcalc/tests/test_utils.py +792 -0
  88. taxcalc/tmd_growfactors.csv +54 -54
  89. taxcalc/tmd_weights.csv.gz +0 -0
  90. taxcalc/validation/CSV_INPUT_VARS.md +29 -0
  91. taxcalc/validation/CSV_OUTPUT_VARS.md +63 -0
  92. taxcalc/validation/README.md +68 -0
  93. taxcalc/validation/taxsim35/Differences_Explained.md +54 -0
  94. taxcalc/validation/taxsim35/README.md +139 -0
  95. taxcalc/validation/taxsim35/expected_differences/a17-taxdiffs-expect.csv +25 -0
  96. taxcalc/validation/taxsim35/expected_differences/a18-taxdiffs-expect.csv +25 -0
  97. taxcalc/validation/taxsim35/expected_differences/a19-taxdiffs-expect.csv +25 -0
  98. taxcalc/validation/taxsim35/expected_differences/a20-taxdiffs-expect.csv +25 -0
  99. taxcalc/validation/taxsim35/expected_differences/a21-taxdiffs-expect.csv +25 -0
  100. taxcalc/validation/taxsim35/expected_differences/b17-taxdiffs-expect.csv +25 -0
  101. taxcalc/validation/taxsim35/expected_differences/b18-taxdiffs-expect.csv +25 -0
  102. taxcalc/validation/taxsim35/expected_differences/b19-taxdiffs-expect.csv +25 -0
  103. taxcalc/validation/taxsim35/expected_differences/b20-taxdiffs-expect.csv +25 -0
  104. taxcalc/validation/taxsim35/expected_differences/b21-taxdiffs-expect.csv +25 -0
  105. taxcalc/validation/taxsim35/expected_differences/c17-taxdiffs-expect.csv +25 -0
  106. taxcalc/validation/taxsim35/expected_differences/c18-taxdiffs-expect.csv +25 -0
  107. taxcalc/validation/taxsim35/expected_differences/c19-taxdiffs-expect.csv +25 -0
  108. taxcalc/validation/taxsim35/input_setup.py +67 -0
  109. taxcalc/validation/taxsim35/main_comparison.py +183 -0
  110. taxcalc/validation/taxsim35/prepare_taxcalc_input.py +161 -0
  111. taxcalc/validation/taxsim35/process_taxcalc_output.py +140 -0
  112. taxcalc/validation/taxsim35/taxsim_emulation.json +49 -0
  113. taxcalc/validation/taxsim35/taxsim_input.py +321 -0
  114. taxcalc/validation/taxsim35/tc_sims.py +98 -0
  115. taxcalc/validation/taxsim35/tests_35.py +80 -0
  116. taxcalc/validation/tests_35.sh +13 -0
  117. {taxcalc-4.2.1.dist-info → taxcalc-4.2.2.dist-info}/METADATA +3 -4
  118. taxcalc-4.2.2.dist-info/RECORD +144 -0
  119. {taxcalc-4.2.1.dist-info → taxcalc-4.2.2.dist-info}/WHEEL +1 -1
  120. taxcalc-4.2.1.dist-info/RECORD +0 -34
  121. {taxcalc-4.2.1.dist-info → taxcalc-4.2.2.dist-info}/LICENSE +0 -0
  122. {taxcalc-4.2.1.dist-info → taxcalc-4.2.2.dist-info}/entry_points.txt +0 -0
  123. {taxcalc-4.2.1.dist-info → taxcalc-4.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,792 @@
1
+ """
2
+ Tests of Tax-Calculator utility functions.
3
+ """
4
+ # CODING-STYLE CHECKS:
5
+ # pycodestyle test_utils.py
6
+ # pylint --disable=locally-disabled test_utils.py
7
+ #
8
+ # pylint: disable=missing-docstring
9
+
10
+ import os
11
+ import math
12
+ import random
13
+ import numpy as np
14
+ import pandas as pd
15
+ import pytest
16
+ # pylint: disable=import-error
17
+ from taxcalc import Policy, Records, Calculator
18
+ from taxcalc.utils import (DIST_VARIABLES,
19
+ DIST_TABLE_COLUMNS, DIST_TABLE_LABELS,
20
+ DIFF_VARIABLES,
21
+ DIFF_TABLE_COLUMNS, DIFF_TABLE_LABELS,
22
+ SOI_AGI_BINS,
23
+ create_difference_table,
24
+ weighted_sum, weighted_mean,
25
+ wage_weighted, agi_weighted,
26
+ expanded_income_weighted,
27
+ add_income_table_row_variable,
28
+ add_quantile_table_row_variable,
29
+ mtr_graph_data, atr_graph_data,
30
+ xtr_graph_plot, write_graph_file,
31
+ read_egg_csv, read_egg_json, delete_file,
32
+ bootstrap_se_ci,
33
+ certainty_equivalent,
34
+ ce_aftertax_expanded_income)
35
+
36
+
37
+ DATA = [[1.0, 2, 'a'],
38
+ [-1.0, 4, 'a'],
39
+ [3.0, 6, 'a'],
40
+ [2.0, 4, 'b'],
41
+ [3.0, 6, 'b']]
42
+
43
+ WEIGHT_DATA = [[1.0, 2.0, 10.0],
44
+ [2.0, 4.0, 20.0],
45
+ [3.0, 6.0, 30.0]]
46
+
47
+ DATA_FLOAT = [[1.0, 2, 'a'],
48
+ [-1.0, 4, 'a'],
49
+ [0.0000000001, 3, 'a'],
50
+ [-0.0000000001, 1, 'a'],
51
+ [3.0, 6, 'a'],
52
+ [2.0, 4, 'b'],
53
+ [0.0000000001, 3, 'b'],
54
+ [-0.0000000001, 1, 'b'],
55
+ [3.0, 6, 'b']]
56
+
57
+
58
+ def test_validity_of_name_lists():
59
+ assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
60
+ records_varinfo = Records(data=None)
61
+ assert set(DIST_VARIABLES).issubset(records_varinfo.CALCULATED_VARS |
62
+ {'s006', 'XTOT'})
63
+ extra_vars_set = set(['count',
64
+ 'count_StandardDed',
65
+ 'count_ItemDed',
66
+ 'count_AMT'])
67
+ assert (set(DIST_TABLE_COLUMNS) - set(DIST_VARIABLES)) == extra_vars_set
68
+
69
+
70
+ def test_create_tables(cps_subsample):
71
+ # pylint: disable=too-many-statements,too-many-branches
72
+ # create a current-law Policy object and Calculator object calc1
73
+ rec = Records.cps_constructor(data=cps_subsample)
74
+ pol = Policy()
75
+ calc1 = Calculator(policy=pol, records=rec)
76
+ calc1.calc_all()
77
+ # create a policy-reform Policy object and Calculator object calc2
78
+ reform = {'II_rt1': {2013: 0.15}}
79
+ pol.implement_reform(reform)
80
+ calc2 = Calculator(policy=pol, records=rec)
81
+ calc2.calc_all()
82
+
83
+ test_failure = False
84
+
85
+ # test creating various difference tables
86
+
87
+ diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
88
+ calc2.dataframe(DIFF_VARIABLES),
89
+ 'standard_income_bins', 'combined')
90
+ assert isinstance(diff, pd.DataFrame)
91
+ tabcol = 'pc_aftertaxinc'
92
+ expected = [0.0,
93
+ np.nan,
94
+ -0.1,
95
+ -0.5,
96
+ -0.7,
97
+ -0.7,
98
+ -0.8,
99
+ -0.7,
100
+ -0.7,
101
+ -0.7,
102
+ -0.3,
103
+ -0.1,
104
+ -0.0,
105
+ -0.6]
106
+ if not np.allclose(diff[tabcol].values.astype('float'), expected,
107
+ atol=0.1, rtol=0.0, equal_nan=True):
108
+ test_failure = True
109
+ print('diff xbin', tabcol)
110
+ for val in diff[tabcol].values:
111
+ print('{:.1f},'.format(val))
112
+
113
+ diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
114
+ calc2.dataframe(DIFF_VARIABLES),
115
+ 'weighted_deciles', 'combined')
116
+ assert isinstance(diff, pd.DataFrame)
117
+ tabcol = 'tot_change'
118
+ expected = [0.0,
119
+ 0.0,
120
+ 0.0,
121
+ 0.6,
122
+ 2.9,
123
+ 3.5,
124
+ 4.4,
125
+ 6.1,
126
+ 6.5,
127
+ 8.7,
128
+ 12.0,
129
+ 13.3,
130
+ 58.0,
131
+ 7.7,
132
+ 4.8,
133
+ 0.8]
134
+ if not np.allclose(diff[tabcol].values.astype('float'), expected,
135
+ atol=0.1, rtol=0.0):
136
+ test_failure = True
137
+ print('diff xdec', tabcol)
138
+ for val in diff[tabcol].values:
139
+ print('{:.1f},'.format(val))
140
+
141
+ tabcol = 'share_of_change'
142
+ expected = [0.0,
143
+ 0.0,
144
+ 0.0,
145
+ 1.0,
146
+ 5.0,
147
+ 6.0,
148
+ 7.6,
149
+ 10.6,
150
+ 11.1,
151
+ 15.1,
152
+ 20.7,
153
+ 22.9,
154
+ 100.0,
155
+ 13.2,
156
+ 8.3,
157
+ 1.4,]
158
+ if not np.allclose(diff[tabcol].values.astype('float'), expected,
159
+ atol=0.1, rtol=0.0):
160
+ test_failure = True
161
+ print('diff xdec', tabcol)
162
+ for val in diff[tabcol].values:
163
+ print('{:.1f},'.format(val))
164
+
165
+ tabcol = 'pc_aftertaxinc'
166
+ expected = [np.nan,
167
+ 0.0,
168
+ -0.0,
169
+ -0.3,
170
+ -0.8,
171
+ -0.7,
172
+ -0.7,
173
+ -0.8,
174
+ -0.7,
175
+ -0.7,
176
+ -0.7,
177
+ -0.3,
178
+ -0.6,
179
+ -0.7,
180
+ -0.4,
181
+ -0.1]
182
+ if not np.allclose(diff[tabcol].values.astype('float'), expected,
183
+ atol=0.1, rtol=0.0, equal_nan=True):
184
+ test_failure = True
185
+ print('diff xdec', tabcol)
186
+ for val in diff[tabcol].values:
187
+ print('{:.1f},'.format(val))
188
+
189
+ # test creating various distribution tables
190
+
191
+ dist, _ = calc2.distribution_tables(None, 'weighted_deciles')
192
+ assert isinstance(dist, pd.DataFrame)
193
+ tabcol = 'iitax'
194
+ expected = [0.0,
195
+ 0.0,
196
+ -0.4,
197
+ -4.1,
198
+ -5.9,
199
+ 8.0,
200
+ 16.9,
201
+ 29.0,
202
+ 27.0,
203
+ 71.4,
204
+ 153.4,
205
+ 910.1,
206
+ 1205.5,
207
+ 159.4,
208
+ 268.1,
209
+ 482.7]
210
+ if not np.allclose(dist[tabcol].values.astype('float'), expected,
211
+ atol=0.1, rtol=0.0):
212
+ test_failure = True
213
+ print('dist xdec', tabcol)
214
+ for val in dist[tabcol].values:
215
+ print('{:.1f},'.format(val))
216
+
217
+ tabcol = 'count_ItemDed'
218
+ expected = [0.0,
219
+ 0.0,
220
+ 0.0,
221
+ 1.1,
222
+ 2.6,
223
+ 3.9,
224
+ 4.7,
225
+ 6.3,
226
+ 6.5,
227
+ 7.4,
228
+ 11.3,
229
+ 16.3,
230
+ 60.3,
231
+ 7.4,
232
+ 7.2,
233
+ 1.7]
234
+ if not np.allclose(dist[tabcol].tolist(), expected,
235
+ atol=0.1, rtol=0.0):
236
+ test_failure = True
237
+ print('dist xdec', tabcol)
238
+ for val in dist[tabcol].values:
239
+ print('{:.1f},'.format(val))
240
+
241
+ tabcol = 'expanded_income'
242
+ expected = [0.0,
243
+ -1.4,
244
+ 30.7,
245
+ 209.8,
246
+ 388.8,
247
+ 541.2,
248
+ 679.1,
249
+ 847.6,
250
+ 1097.1,
251
+ 1430.7,
252
+ 1978.3,
253
+ 5007.6,
254
+ 12209.4,
255
+ 1410.9,
256
+ 1765.5,
257
+ 1831.2]
258
+ if not np.allclose(dist[tabcol].tolist(), expected,
259
+ atol=0.1, rtol=0.0):
260
+ test_failure = True
261
+ print('dist xdec', tabcol)
262
+ for val in dist[tabcol].values:
263
+ print('{:.1f},'.format(val))
264
+
265
+ tabcol = 'aftertax_income'
266
+ expected = [0.0,
267
+ -1.4,
268
+ 29.0,
269
+ 195.5,
270
+ 363.0,
271
+ 491.0,
272
+ 612.2,
273
+ 747.1,
274
+ 980.6,
275
+ 1248.0,
276
+ 1630.2,
277
+ 3741.3,
278
+ 10036.6,
279
+ 1100.9,
280
+ 1339.0,
281
+ 1301.4]
282
+ if not np.allclose(dist[tabcol].tolist(), expected,
283
+ atol=0.1, rtol=0.0):
284
+ test_failure = True
285
+ print('dist xdec', tabcol)
286
+ for val in dist[tabcol].values:
287
+ print('{:.1f},'.format(val))
288
+
289
+ dist, _ = calc2.distribution_tables(None, 'standard_income_bins')
290
+ assert isinstance(dist, pd.DataFrame)
291
+ tabcol = 'iitax'
292
+ expected = [0.0,
293
+ 0.0,
294
+ -1.3,
295
+ -7.6,
296
+ -1.2,
297
+ 20.7,
298
+ 26.3,
299
+ 47.2,
300
+ 95.5,
301
+ 321.9,
302
+ 324.0,
303
+ 64.8,
304
+ 315.2,
305
+ 1205.5]
306
+ if not np.allclose(dist[tabcol].values.astype('float'), expected,
307
+ atol=0.1, rtol=0.0):
308
+ test_failure = True
309
+ print('dist xbin', tabcol)
310
+ for val in dist[tabcol].values:
311
+ print('{:.1f},'.format(val))
312
+
313
+ tabcol = 'count_ItemDed'
314
+ expected = [0.0,
315
+ 0.0,
316
+ 0.2,
317
+ 1.8,
318
+ 3.6,
319
+ 5.9,
320
+ 5.7,
321
+ 10.2,
322
+ 8.1,
323
+ 17.7,
324
+ 6.7,
325
+ 0.3,
326
+ 0.1,
327
+ 60.3]
328
+ if not np.allclose(dist[tabcol].tolist(), expected,
329
+ atol=0.1, rtol=0.0):
330
+ test_failure = True
331
+ print('dist xbin', tabcol)
332
+ for val in dist[tabcol].values:
333
+ print('{:.1f},'.format(val))
334
+
335
+ if test_failure:
336
+ assert 1 == 2
337
+
338
+
339
+ def test_diff_count_precision():
340
+ """
341
+ Estimate bootstrap standard error and confidence interval for count
342
+ statistics ('tax_cut' and 'tax_inc') in difference table generated
343
+ using puf.csv input data taking no account of tbi privacy fuzzing and
344
+ assuming all filing units in each bin have the same weight. These
345
+ assumptions imply that the estimates produced here are likely to
346
+ over-estimate the precision of the count statistics.
347
+
348
+ Background information on unweighted number of filing units by bin:
349
+
350
+ DECILE BINS:
351
+ 0 16268
352
+ 1 14897
353
+ 2 13620
354
+ 3 15760
355
+ 4 16426
356
+ 5 18070
357
+ 6 18348
358
+ 7 19352
359
+ 8 21051
360
+ 9 61733 <--- largest unweighted bin count
361
+ A 215525
362
+
363
+ STANDARD BINS:
364
+ 0 7081 <--- negative income bin was dropped in TaxBrain display
365
+ 1 19355
366
+ 2 22722
367
+ 3 20098
368
+ 4 17088
369
+ 5 14515
370
+ 6 24760
371
+ 7 15875
372
+ 8 25225
373
+ 9 15123
374
+ 10 10570 <--- smallest unweighted bin count
375
+ 11 23113 <--- second largest unweighted WEBAPP bin count
376
+ A 215525
377
+
378
+ Background information on Trump2017.json reform used in TaxBrain run 16649:
379
+
380
+ STANDARD bin 10 ($500-1000 thousand) has weighted count of 1179 thousand;
381
+ weighted count of units with tax increase is 32 thousand.
382
+
383
+ So, the mean weight for all units in STANDARD bin 10 is 111.5421 and the
384
+ unweighted number with a tax increase is 287 assuming all units in that
385
+ bin have the same weight. (Note that 287 * 111.5421 is about 32,012.58,
386
+ which rounds to the 32 thousand shown in the TaxBrain difference table.)
387
+
388
+ STANDARD bin 11 ($1000+ thousand) has weighted count of 636 thousand;
389
+ weighted count of units with tax increase is 27 thousand.
390
+
391
+ So, the mean weight for all units in STANDARD bin 11 is about 27.517 and
392
+ the unweighted number with a tax increase is 981 assuming all units in
393
+ that bin have the same weight. (Note that 981 * 27.517 is about 26,994.18,
394
+ which rounds to the 27 thousand shown in the TaxBrain difference table.)
395
+ """
396
+ dump = False # setting to True implies results printed and test fails
397
+ seed = 123456789
398
+ bs_samples = 1000
399
+ alpha = 0.025 # implies 95% confidence interval
400
+ # compute stderr and confidence interval for STANDARD bin 10 increase count
401
+ data_list = [111.5421] * 287 + [0.0] * (10570 - 287)
402
+ assert len(data_list) == 10570
403
+ data = np.array(data_list)
404
+ assert (data > 0).sum() == 287
405
+ data_estimate = np.sum(data) * 1e-3
406
+ assert abs((data_estimate / 32) - 1) < 0.0005
407
+ bsd = bootstrap_se_ci(data, seed, bs_samples, np.sum, alpha)
408
+ stderr = bsd['se'] * 1e-3
409
+ cilo = bsd['cilo'] * 1e-3
410
+ cihi = bsd['cihi'] * 1e-3
411
+ if dump:
412
+ res = '{}EST={:.1f} B={} alpha={:.3f} se={:.2f} ci=[ {:.2f} , {:.2f} ]'
413
+ print(
414
+ res.format('STANDARD-BIN10: ',
415
+ data_estimate, bs_samples, alpha, stderr, cilo, cihi)
416
+ )
417
+ assert abs((stderr / 1.90) - 1) < 0.0008
418
+ # NOTE: a se of 1.90 thousand implies that when comparing the difference
419
+ # in the weighted number of filing units in STANDARD bin 10 with a
420
+ # tax increase, the difference statistic has a bigger se (because
421
+ # the variance of the difference is the sum of the variances of the
422
+ # two point estimates). So, in STANDARD bin 10 if the point
423
+ # estimates both had se = 1.90, then the difference in the point
424
+ # estimates has has a se = 2.687. This means that the difference
425
+ # would have to be over 5 thousand in order for there to be high
426
+ # confidence that the difference was different from zero in a
427
+ # statistically significant manner.
428
+ # Or put a different way, a difference of 1 thousand cannot be
429
+ # accurately detected while a difference of 10 thousand can be
430
+ # accurately detected.
431
+ assert abs((cilo / 28.33) - 1) < 0.0012
432
+ assert abs((cihi / 35.81) - 1) < 0.0012
433
+ # compute stderr and confidence interval for STANDARD bin 11 increase count
434
+ data_list = [27.517] * 981 + [0.0] * (23113 - 981)
435
+ assert len(data_list) == 23113
436
+ data = np.array(data_list)
437
+ assert (data > 0).sum() == 981
438
+ data_estimate = np.sum(data) * 1e-3
439
+ assert abs((data_estimate / 27) - 1) < 0.0005
440
+ bsd = bootstrap_se_ci(data, seed, bs_samples, np.sum, alpha)
441
+ stderr = bsd['se'] * 1e-3
442
+ cilo = bsd['cilo'] * 1e-3
443
+ cihi = bsd['cihi'] * 1e-3
444
+ if dump:
445
+ res = '{}EST={:.1f} B={} alpha={:.3f} se={:.2f} ci=[ {:.2f} , {:.2f} ]'
446
+ print(
447
+ res.format('STANDARD-BIN11: ',
448
+ data_estimate, bs_samples, alpha, stderr, cilo, cihi)
449
+ )
450
+ assert abs((stderr / 0.85) - 1) < 0.0040
451
+ # NOTE: a se of 0.85 thousand implies that when comparing the difference
452
+ # in the weighted number of filing units in STANDARD bin 11 with a
453
+ # tax increase, the difference statistic has a bigger se (because
454
+ # the variance of the difference is the sum of the variances of the
455
+ # two point estimates). So, in STANDARD bin 11 if point estimates
456
+ # both had se = 0.85, then the difference in the point estimates has
457
+ # has a se = 1.20. This means that the difference would have to be
458
+ # over 2.5 thousand in order for there to be high confidence that the
459
+ # difference was different from zero in a statistically significant
460
+ # manner.
461
+ # Or put a different way, a difference of 1 thousand cannot be
462
+ # accurately detected while a difference of 10 thousand can be
463
+ # accurately detected.
464
+ assert abs((cilo / 25.37) - 1) < 0.0012
465
+ assert abs((cihi / 28.65) - 1) < 0.0012
466
+ # fail if doing dump
467
+ assert not dump
468
+
469
+
470
+ def test_weighted_mean():
471
+ dfx = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
472
+ grouped = dfx.groupby('label')
473
+ diffs = grouped.apply(weighted_mean, 'tax_diff', include_groups=False)
474
+ exp = pd.Series(data=[16.0 / 12.0, 26.0 / 10.0], index=['a', 'b'])
475
+ exp.index.name = 'label'
476
+ pd.testing.assert_series_equal(exp, diffs)
477
+
478
+
479
+ def test_wage_weighted():
480
+ dfx = pd.DataFrame(data=WEIGHT_DATA, columns=['var', 's006', 'e00200'])
481
+ wvar = wage_weighted(dfx, 'var')
482
+ assert round(wvar, 4) == 2.5714
483
+
484
+
485
+ def test_agi_weighted():
486
+ dfx = pd.DataFrame(data=WEIGHT_DATA, columns=['var', 's006', 'c00100'])
487
+ wvar = agi_weighted(dfx, 'var')
488
+ assert round(wvar, 4) == 2.5714
489
+
490
+
491
+ def test_expanded_income_weighted():
492
+ dfx = pd.DataFrame(data=WEIGHT_DATA,
493
+ columns=['var', 's006', 'expanded_income'])
494
+ wvar = expanded_income_weighted(dfx, 'var')
495
+ assert round(wvar, 4) == 2.5714
496
+
497
+
498
+ def test_weighted_sum():
499
+ dfx = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
500
+ grouped = dfx.groupby('label')
501
+ diffs = grouped.apply(weighted_sum, 'tax_diff', include_groups=False)
502
+ exp = pd.Series(data=[16.0, 26.0], index=['a', 'b'])
503
+ exp.index.name = 'label'
504
+ pd.testing.assert_series_equal(exp, diffs)
505
+
506
+
507
+ EPSILON = 1e-5
508
+
509
+
510
+ def test_add_income_trow_var():
511
+ dta = np.arange(1, 1e6, 5000)
512
+ vdf = pd.DataFrame(data=dta, columns=['expanded_income'])
513
+ vdf = add_income_table_row_variable(vdf, 'expanded_income', SOI_AGI_BINS)
514
+ gdf = vdf.groupby('table_row', observed=False)
515
+ idx = 1
516
+ for name, _ in gdf:
517
+ assert name.closed == 'left'
518
+ assert abs(name.right - SOI_AGI_BINS[idx]) < EPSILON
519
+ idx += 1
520
+
521
+
522
+ def test_add_quantile_trow_var():
523
+ dfx = pd.DataFrame(data=DATA, columns=['expanded_income', 's006', 'label'])
524
+ dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
525
+ 100, decile_details=False,
526
+ weight_by_income_measure=False)
527
+ bin_labels = dfb['table_row'].unique()
528
+ default_labels = set(range(1, 101))
529
+ for lab in bin_labels:
530
+ assert lab in default_labels
531
+ dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
532
+ 100, decile_details=False)
533
+ assert 'table_row' in dfb
534
+ with pytest.raises(ValueError):
535
+ dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
536
+ 100, decile_details=True)
537
+
538
+
539
+ def test_dist_table_sum_row(cps_subsample):
540
+ rec = Records.cps_constructor(data=cps_subsample)
541
+ calc = Calculator(policy=Policy(), records=rec)
542
+ calc.calc_all()
543
+ # create three distribution tables and compare the ALL row contents
544
+ tb1, _ = calc.distribution_tables(None, 'standard_income_bins')
545
+ tb2, _ = calc.distribution_tables(None, 'soi_agi_bins')
546
+ tb3, _ = calc.distribution_tables(None, 'weighted_deciles')
547
+ tb4, _ = calc.distribution_tables(None, 'weighted_deciles',
548
+ pop_quantiles=True)
549
+ assert np.allclose(tb1.loc['ALL'].values.astype('float'),
550
+ tb2.loc['ALL'].values.astype('float'))
551
+ assert np.allclose(tb1.loc['ALL'].values.astype('float'),
552
+ tb3.loc['ALL'].values.astype('float'))
553
+ # make sure population count is larger than filing-unit count
554
+ assert tb4.at['ALL', 'count'] > tb1.at['ALL', 'count']
555
+ # make sure population table has same ALL row values as filing-unit table
556
+ for col in ['count', 'count_StandardDed', 'count_ItemDed', 'count_AMT']:
557
+ tb4.at['ALL', col] = tb1.at['ALL', col]
558
+ assert np.allclose(tb1.loc['ALL'].values.astype('float'),
559
+ tb4.loc['ALL'].values.astype('float'))
560
+ # make sure population table has same ALL tax liabilities as diagnostic tbl
561
+ dgt = calc.diagnostic_table(1)
562
+ assert np.allclose([tb4.at['ALL', 'iitax'],
563
+ tb4.at['ALL', 'payrolltax']],
564
+ [dgt.at['Ind Income Tax ($b)', calc.current_year],
565
+ dgt.at['Payroll Taxes ($b)', calc.current_year]])
566
+
567
+
568
+ def test_diff_table_sum_row(cps_subsample):
569
+ rec = Records.cps_constructor(data=cps_subsample)
570
+ # create a current-law Policy object and Calculator calc1
571
+ pol = Policy()
572
+ calc1 = Calculator(policy=pol, records=rec)
573
+ calc1.calc_all()
574
+ # create a policy-reform Policy object and Calculator calc2
575
+ reform = {'II_rt4': {2013: 0.56}}
576
+ pol.implement_reform(reform)
577
+ calc2 = Calculator(policy=pol, records=rec)
578
+ calc2.calc_all()
579
+ # create three difference tables and compare their content
580
+ dv1 = calc1.dataframe(DIFF_VARIABLES)
581
+ dv2 = calc2.dataframe(DIFF_VARIABLES)
582
+ dt1 = create_difference_table(
583
+ dv1, dv2, 'standard_income_bins', 'iitax')
584
+ dt2 = create_difference_table(dv1, dv2, 'soi_agi_bins', 'iitax')
585
+ dt3 = create_difference_table(
586
+ dv1, dv2, 'weighted_deciles', 'iitax', pop_quantiles=False)
587
+ dt4 = create_difference_table(
588
+ dv1, dv2, 'weighted_deciles', 'iitax', pop_quantiles=True)
589
+ assert np.allclose(dt1.loc['ALL'].values.astype('float'),
590
+ dt2.loc['ALL'].values.astype('float'))
591
+ assert np.allclose(dt1.loc['ALL'].values.astype('float'),
592
+ dt3.loc['ALL'].values.astype('float'))
593
+ # make sure population count is larger than filing-unit count
594
+ assert dt4.at['ALL', 'count'] > dt1.at['ALL', 'count']
595
+
596
+
597
+ def test_mtr_graph_data(cps_subsample):
598
+ recs = Records.cps_constructor(data=cps_subsample)
599
+ calc = Calculator(policy=Policy(), records=recs)
600
+ year = calc.current_year
601
+ with pytest.raises(ValueError):
602
+ mtr_graph_data(None, year, mars='bad',
603
+ income_measure='agi',
604
+ dollar_weighting=True)
605
+ with pytest.raises(ValueError):
606
+ mtr_graph_data(None, year, mars=0,
607
+ income_measure='expanded_income',
608
+ dollar_weighting=True)
609
+ with pytest.raises(ValueError):
610
+ mtr_graph_data(None, year, mars=list())
611
+ with pytest.raises(ValueError):
612
+ mtr_graph_data(None, year, mars='ALL', mtr_variable='e00200s')
613
+ with pytest.raises(ValueError):
614
+ mtr_graph_data(None, year, mtr_measure='badtax')
615
+ with pytest.raises(ValueError):
616
+ mtr_graph_data(None, year, income_measure='badincome')
617
+ mtr = 0.20 * np.ones_like(cps_subsample['e00200'])
618
+ vdf = calc.dataframe(['s006', 'MARS', 'e00200'])
619
+ vdf['mtr1'] = mtr
620
+ vdf['mtr2'] = mtr
621
+ vdf = vdf[vdf['MARS'] == 1]
622
+ gdata = mtr_graph_data(vdf, year, mars=1,
623
+ mtr_wrt_full_compen=True,
624
+ income_measure='wages',
625
+ dollar_weighting=True)
626
+ assert isinstance(gdata, dict)
627
+
628
+
629
+ def test_atr_graph_data(cps_subsample):
630
+ pol = Policy()
631
+ rec = Records.cps_constructor(data=cps_subsample)
632
+ calc = Calculator(policy=pol, records=rec)
633
+ year = calc.current_year
634
+ with pytest.raises(ValueError):
635
+ atr_graph_data(None, year, mars='bad')
636
+ with pytest.raises(ValueError):
637
+ atr_graph_data(None, year, mars=0)
638
+ with pytest.raises(ValueError):
639
+ atr_graph_data(None, year, mars=list())
640
+ with pytest.raises(ValueError):
641
+ atr_graph_data(None, year, atr_measure='badtax')
642
+ calc.calc_all()
643
+ vdf = calc.dataframe(['s006', 'MARS', 'expanded_income'])
644
+ tax = 0.20 * np.ones_like(vdf['expanded_income'])
645
+ vdf['tax1'] = tax
646
+ vdf['tax2'] = tax
647
+ gdata = atr_graph_data(vdf, year, mars=1, atr_measure='combined')
648
+ gdata = atr_graph_data(vdf, year, atr_measure='itax')
649
+ gdata = atr_graph_data(vdf, year, atr_measure='ptax')
650
+ assert isinstance(gdata, dict)
651
+
652
+
653
+ def test_xtr_graph_plot(cps_subsample):
654
+ recs = Records.cps_constructor(data=cps_subsample)
655
+ calc = Calculator(policy=Policy(), records=recs)
656
+ mtr = 0.20 * np.ones_like(cps_subsample['e00200'])
657
+ vdf = calc.dataframe(['s006', 'MARS', 'c00100'])
658
+ vdf['mtr1'] = mtr
659
+ vdf['mtr2'] = mtr
660
+ gdata = mtr_graph_data(vdf, calc.current_year, mtr_measure='ptax',
661
+ income_measure='agi',
662
+ dollar_weighting=False)
663
+ gplot = xtr_graph_plot(gdata)
664
+ assert gplot
665
+ vdf = calc.dataframe(['s006', 'expanded_income'])
666
+ vdf['mtr1'] = mtr
667
+ vdf['mtr2'] = mtr
668
+ gdata = mtr_graph_data(vdf, calc.current_year, mtr_measure='itax',
669
+ alt_e00200p_text='Taxpayer Earnings',
670
+ income_measure='expanded_income',
671
+ dollar_weighting=False)
672
+ assert isinstance(gdata, dict)
673
+
674
+
675
+ def temporary_filename(suffix=''):
676
+ # Return string containing the temporary filename.
677
+ return 'tmp{}{}'.format(random.randint(10000000, 99999999), suffix)
678
+
679
+
680
+ def test_write_graph_file(cps_subsample):
681
+ recs = Records.cps_constructor(data=cps_subsample)
682
+ calc = Calculator(policy=Policy(), records=recs)
683
+ mtr = 0.20 * np.ones_like(cps_subsample['e00200'])
684
+ vdf = calc.dataframe(['s006', 'e00200', 'c00100'])
685
+ vdf['mtr1'] = mtr
686
+ vdf['mtr2'] = mtr
687
+ gdata = mtr_graph_data(vdf, calc.current_year, mtr_measure='ptax',
688
+ alt_e00200p_text='Taxpayer Earnings',
689
+ income_measure='agi',
690
+ dollar_weighting=False)
691
+ gplot = xtr_graph_plot(gdata)
692
+ assert gplot
693
+ htmlfname = temporary_filename(suffix='.html')
694
+ try:
695
+ write_graph_file(gplot, htmlfname, 'title')
696
+ except Exception: # pylint: disable=broad-except
697
+ if os.path.isfile(htmlfname):
698
+ try:
699
+ os.remove(htmlfname)
700
+ except OSError:
701
+ pass # sometimes we can't remove a generated temporary file
702
+ assert 'write_graph_file()_ok' == 'no'
703
+ # if try was successful, try to remove the file
704
+ if os.path.isfile(htmlfname):
705
+ try:
706
+ os.remove(htmlfname)
707
+ except OSError:
708
+ pass # sometimes we can't remove a generated temporary file
709
+
710
+
711
+ def test_ce_aftertax_income(cps_subsample):
712
+ # test certainty_equivalent() function with con>cmin
713
+ con = 5000
714
+ cmin = 1000
715
+ assert con == round(certainty_equivalent(con, 0, cmin), 6)
716
+ assert con > round(certainty_equivalent((math.log(con) - 0.1), 1, cmin), 6)
717
+ # test certainty_equivalent() function with con<cmin
718
+ con = 500
719
+ cmin = 1000
720
+ assert con == round(certainty_equivalent(con, 0, cmin), 6)
721
+ # test with require_no_agg_tax_change equal to False
722
+ rec = Records.cps_constructor(data=cps_subsample)
723
+ cyr = 2020
724
+ # specify calc1 and calc_all() for cyr
725
+ pol = Policy()
726
+ calc1 = Calculator(policy=pol, records=rec)
727
+ calc1.advance_to_year(cyr)
728
+ calc1.calc_all()
729
+ # specify calc2 and calc_all() for cyr
730
+ reform = {'II_em': {2019: 1000}}
731
+ pol.implement_reform(reform)
732
+ calc2 = Calculator(policy=pol, records=rec)
733
+ calc2.advance_to_year(cyr)
734
+ calc2.calc_all()
735
+ df1 = calc1.dataframe(['s006', 'combined', 'expanded_income'])
736
+ df2 = calc2.dataframe(['s006', 'combined', 'expanded_income'])
737
+ cedict = ce_aftertax_expanded_income(df1, df2,
738
+ require_no_agg_tax_change=False)
739
+ assert isinstance(cedict, dict)
740
+ np.allclose(cedict['ceeu1'], [55641, 27167, 5726, 2229, 1565],
741
+ atol=0.5, rtol=0.0)
742
+ np.allclose(cedict['ceeu2'], [54629, 26698, 5710, 2229, 1565],
743
+ atol=0.5, rtol=0.0)
744
+ # test with require_no_agg_tax_change equal to True
745
+ with pytest.raises(ValueError):
746
+ ce_aftertax_expanded_income(df1, df2, require_no_agg_tax_change=True)
747
+ # test with require_no_agg_tax_change equal to False and custom_params
748
+ params = {'crra_list': [0, 2], 'cmin_value': 2000}
749
+ with pytest.raises(ValueError):
750
+ ce_aftertax_expanded_income(df1, df2, require_no_agg_tax_change=True,
751
+ custom_params=params)
752
+
753
+
754
+ def test_read_egg_csv():
755
+ with pytest.raises(ValueError):
756
+ read_egg_csv('bad_filename')
757
+
758
+
759
+ def test_read_egg_json():
760
+ with pytest.raises(ValueError):
761
+ read_egg_json('bad_filename')
762
+
763
+
764
+ def test_create_delete_temp_file():
765
+ # test temporary_filename() and delete_file() functions
766
+ fname = temporary_filename()
767
+ with open(fname, 'w') as tmpfile:
768
+ tmpfile.write('any content will do')
769
+ assert os.path.isfile(fname) is True
770
+ delete_file(fname)
771
+ assert os.path.isfile(fname) is False
772
+
773
+
774
+ def test_bootstrap_se_ci():
775
+ # Use treated mouse data from Table 2.1 and
776
+ # results from Table 2.2 and Table 13.1 in
777
+ # Bradley Efron and Robert Tibshirani,
778
+ # "An Introduction to the Bootstrap"
779
+ # (Chapman & Hall, 1993).
780
+ data = np.array([94, 197, 16, 38, 99, 141, 23], dtype=np.float64)
781
+ assert abs(np.mean(data) - 86.86) < 0.005 # this is just rounding error
782
+ bsd = bootstrap_se_ci(data, 123456789, 1000, np.mean, alpha=0.025)
783
+ # following comparisons are less precise because of r.n. stream differences
784
+ assert abs(bsd['se'] / 23.02 - 1) < 0.02
785
+ assert abs(bsd['cilo'] / 45.9 - 1) < 0.02
786
+ assert abs(bsd['cihi'] / 135.4 - 1) < 0.03
787
+
788
+
789
+ def test_table_columns_labels():
790
+ # check that length of two lists are the same
791
+ assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
792
+ assert len(DIFF_TABLE_COLUMNS) == len(DIFF_TABLE_LABELS)