taxcalc 5.2.0__py3-none-any.whl → 6.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- taxcalc/__init__.py +3 -3
- taxcalc/calcfunctions.py +2 -2
- taxcalc/calculator.py +4 -4
- taxcalc/cli/tc.py +16 -19
- taxcalc/data.py +2 -3
- taxcalc/decorators.py +9 -8
- taxcalc/growfactors.py +2 -1
- taxcalc/policy.py +6 -23
- taxcalc/policy_current_law.json +31 -631
- taxcalc/records.py +78 -82
- taxcalc/records_variables.json +106 -106
- taxcalc/reforms/ARPA.out.csv +9 -9
- taxcalc/taxcalcio.py +101 -77
- taxcalc/tests/conftest.py +20 -15
- taxcalc/tests/puf_var_correl_coeffs_2016.csv +24 -24
- taxcalc/tests/puf_var_wght_means_by_year.csv +11 -11
- taxcalc/tests/pufcsv_agg_expect.csv +20 -20
- taxcalc/tests/pufcsv_mtr_expect.txt +21 -21
- taxcalc/tests/reforms.json +3 -1
- taxcalc/tests/reforms_expect.csv +54 -54
- taxcalc/tests/test_4package.py +8 -9
- taxcalc/tests/test_calculator.py +55 -18
- taxcalc/tests/test_consumption.py +2 -2
- taxcalc/tests/test_cpscsv.py +2 -24
- taxcalc/tests/test_data.py +11 -3
- taxcalc/tests/test_decorators.py +57 -52
- taxcalc/tests/test_growdiff.py +2 -2
- taxcalc/tests/test_parameters.py +101 -53
- taxcalc/tests/test_policy.py +154 -154
- taxcalc/tests/test_records.py +144 -9
- taxcalc/tests/test_reforms.py +104 -104
- taxcalc/tests/test_taxcalcio.py +13 -62
- taxcalc/utils.py +3 -3
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/METADATA +3 -6
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/RECORD +39 -46
- taxcalc/puf_ratios.csv +0 -26
- taxcalc/puf_weights.csv.gz +0 -0
- taxcalc/reforms/clp.out.csv +0 -10
- taxcalc/tests/test_compare.py +0 -330
- taxcalc/tests/test_compatible_data.py +0 -334
- taxcalc/tests/test_puf_var_stats.py +0 -194
- taxcalc/tests/test_pufcsv.py +0 -328
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/WHEEL +0 -0
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/entry_points.txt +0 -0
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/licenses/LICENSE +0 -0
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/top_level.txt +0 -0
@@ -1,334 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Tests of the compatible_data fields in the policy_current_law.json file.
|
3
|
-
|
4
|
-
In order to tap into the parallelization capabilities of py.test, this module
|
5
|
-
leans heavily on py.tests's `parametrization` method. Once you do so, the
|
6
|
-
plug-in pytest-xdist is able to run all parametrized functions in parallel
|
7
|
-
"""
|
8
|
-
# CODING-STYLE CHECKS:
|
9
|
-
# pycodestyle test_compatible_data.py
|
10
|
-
# pylint --disable=locally-disabled test_compatible_data.py
|
11
|
-
|
12
|
-
import copy
|
13
|
-
import pytest
|
14
|
-
import numpy as np
|
15
|
-
from taxcalc.policy import Policy
|
16
|
-
from taxcalc.records import Records
|
17
|
-
from taxcalc.calculator import Calculator
|
18
|
-
|
19
|
-
|
20
|
-
@pytest.fixture(scope='module', name='allparams')
|
21
|
-
def fixture_allparams():
|
22
|
-
"""
|
23
|
-
Return metadata for current-law policy parameters.
|
24
|
-
"""
|
25
|
-
clp = Policy()
|
26
|
-
return clp.metadata()
|
27
|
-
|
28
|
-
|
29
|
-
def test_compatible_data_presence(allparams):
|
30
|
-
"""
|
31
|
-
Test that every parameter in the policy_current_law.json file
|
32
|
-
has a compatible_data field that is a dictionary.
|
33
|
-
"""
|
34
|
-
compatible_data_keys_set = set(['puf', 'cps'])
|
35
|
-
|
36
|
-
# Nested function used only in test_compatible_data_presence
|
37
|
-
def valid_compatible_data(compatible_data):
|
38
|
-
"""
|
39
|
-
Return True if compatible_data is a valid dictionary;
|
40
|
-
otherwise return False
|
41
|
-
"""
|
42
|
-
if not isinstance(compatible_data, dict):
|
43
|
-
return False
|
44
|
-
if set(compatible_data.keys()) != compatible_data_keys_set:
|
45
|
-
return False
|
46
|
-
for key in compatible_data:
|
47
|
-
boolean = (compatible_data[key] is True or
|
48
|
-
compatible_data[key] is False)
|
49
|
-
if not boolean:
|
50
|
-
return False
|
51
|
-
return True
|
52
|
-
|
53
|
-
# Main logic of test_compatible_data_presence function
|
54
|
-
problem_pnames = []
|
55
|
-
for pname in allparams:
|
56
|
-
if 'compatible_data' in allparams[pname]:
|
57
|
-
compatible_data = allparams[pname]['compatible_data']
|
58
|
-
else:
|
59
|
-
compatible_data = None
|
60
|
-
if not valid_compatible_data(compatible_data):
|
61
|
-
problem_pnames.append(pname)
|
62
|
-
if problem_pnames:
|
63
|
-
msg = '{} has no or invalid compatible_data field'
|
64
|
-
for pname in problem_pnames:
|
65
|
-
print(msg.format(pname))
|
66
|
-
assert False, 'ERROR: list of problem_pnames is above'
|
67
|
-
|
68
|
-
|
69
|
-
XX_YEAR = 2019
|
70
|
-
TEST_YEAR = 2020
|
71
|
-
|
72
|
-
|
73
|
-
@pytest.fixture(scope='module', name='reform_xx')
|
74
|
-
def fixture_reform_xx():
|
75
|
-
"""
|
76
|
-
Fixture for reform dictionary where reform starts before TEST_YEAR.
|
77
|
-
|
78
|
-
The provisions in the baseline reform, designated in _reform_xx,
|
79
|
-
are chosen to activate parameters that are inactive under current law.
|
80
|
-
For example a phaseout rate for a new credit is inactive if the credit's
|
81
|
-
amount is set to zero under current law. In order to activate the phaseout
|
82
|
-
rate, the credit amount should be set above zero. The provisions interact
|
83
|
-
with each other: you may acidentally deactivate one parameter
|
84
|
-
by introducing a provision to activate another. If you find that a pair of
|
85
|
-
parameters are impossible test jointly, add one to the local variable
|
86
|
-
`exempt_from_testing` in `test_compatible_data()` as a last resort.
|
87
|
-
"""
|
88
|
-
assert XX_YEAR < TEST_YEAR
|
89
|
-
|
90
|
-
# Set baseline to activate parameters that are inactive under current law.
|
91
|
-
_reform_xx = {
|
92
|
-
XX_YEAR: {
|
93
|
-
'FST_AGI_trt': 0.5,
|
94
|
-
'CTC_new_rt': 0.5,
|
95
|
-
'CTC_new_c': 5000,
|
96
|
-
'CTC_new_prt': 0.1,
|
97
|
-
'CTC_new_refund_limited': True,
|
98
|
-
'CTC_new_refund_limit_payroll_rt': 1,
|
99
|
-
'ACTC_ChildNum': 1,
|
100
|
-
'UBI_u18': 1000,
|
101
|
-
'UBI_1820': 1000,
|
102
|
-
'UBI_21': 1000,
|
103
|
-
'II_credit_prt': 0.1,
|
104
|
-
'II_credit': [100, 100, 100, 100, 100],
|
105
|
-
'CG_brk3': [1000000, 1000000, 1000000, 1000000, 1000000],
|
106
|
-
'ALD_Dependents_Child_c': 1000,
|
107
|
-
'II_credit_nr': [1000, 1000, 1000, 1000, 1000],
|
108
|
-
'II_credit_nr_prt': 0.1,
|
109
|
-
'AMT_CG_brk3': [500000, 500000, 500000, 500000, 500000],
|
110
|
-
'AGI_surtax_thd': [1000000, 1000000, 1000000, 1000000, 1000000],
|
111
|
-
'AGI_surtax_trt': 0.5,
|
112
|
-
'II_brk7': [1000000, 1000000, 1000000, 1000000, 1000000],
|
113
|
-
'II_em': 1000,
|
114
|
-
'ID_Casualty_hc': 0.1,
|
115
|
-
'ID_Miscellaneous_hc': 0.1,
|
116
|
-
'ID_prt': 0.03,
|
117
|
-
'ID_crt': 0.8,
|
118
|
-
'CR_Charity_rt': 0.4,
|
119
|
-
'CR_Charity_f': [5000, 5000, 5000, 5000, 5000],
|
120
|
-
'CR_Charity_frt': 0.5,
|
121
|
-
'CR_SchR_hc': 0.5
|
122
|
-
}
|
123
|
-
}
|
124
|
-
return _reform_xx
|
125
|
-
|
126
|
-
|
127
|
-
@pytest.fixture(scope='module', name='sorted_param_names')
|
128
|
-
def fixture_sorted_param_names(allparams):
|
129
|
-
"""
|
130
|
-
Fixture for storing a sorted parameter list
|
131
|
-
"""
|
132
|
-
return sorted(list(allparams.keys()))
|
133
|
-
|
134
|
-
|
135
|
-
NPARAMS = 219 # hard-code NPARAMS to len(allparams)
|
136
|
-
BATCHSIZE = 10
|
137
|
-
BATCHES = int(np.floor(NPARAMS / BATCHSIZE)) + 1
|
138
|
-
|
139
|
-
|
140
|
-
@pytest.fixture(scope='module', name='allparams_batch',
|
141
|
-
params=list(range(0, BATCHES)))
|
142
|
-
def fixture_allparams_batch(request, allparams, sorted_param_names):
|
143
|
-
"""
|
144
|
-
Fixture for grouping Tax-Calculator parameters
|
145
|
-
|
146
|
-
Experiments indicated that there is some overhead when you run
|
147
|
-
`test_compatible_data` on each parameter individually. Suppose it takes X
|
148
|
-
amount of time to set up the test data for `test_compatible_data` and Y
|
149
|
-
amount of time to run `test_compatible_data` on each parameter wihtout
|
150
|
-
parallelization. Then, if there is no overhead from parallelization, you
|
151
|
-
would expect it to take Y + (X / NUMBER_WORKERS) to run these tests in
|
152
|
-
parallel. Note that setup data is only created once if you set the
|
153
|
-
fixture scope to 'module'. However, experiments indicated that there was
|
154
|
-
so much overhead that the tests weren't that much faster in parallel than
|
155
|
-
if they were run sequentially.
|
156
|
-
|
157
|
-
I found that running the parameters in batches decreased the amount of
|
158
|
-
overhead. Further, there was an optimal batch size that I found through
|
159
|
-
trial and error. On my local machine, this was something like 10
|
160
|
-
parameters. Others may find a different optimal batch size on their
|
161
|
-
machines. Further, if the number of parameters changes, the optimal
|
162
|
-
batch size could change, too.
|
163
|
-
|
164
|
-
Math for partitioning the parameters:
|
165
|
-
|
166
|
-
Suppose we have N parameters and choose batch size n. Then, we have
|
167
|
-
B batches where B equals floor(N / n) + 1.
|
168
|
-
|
169
|
-
Case 1: N % n = 0
|
170
|
-
Then we have:
|
171
|
-
idx_min = {i * b, i = 0, 1, 2, 3, ..., B - 1} and
|
172
|
-
idx_max = {min((i + 1) * b, N), i = 0, 1, 2, 3, ..., B - 1}
|
173
|
-
|
174
|
-
So, if i equals 0, the batch contains the first b - 1 parameters.
|
175
|
-
Then, if i equals B, then idx_min is n * (B - 1) = N and idx_max is N and
|
176
|
-
thus, the last batch is empty.
|
177
|
-
|
178
|
-
Case 2: N % n = r > 0
|
179
|
-
Then, everything is the same as case 1, except for the final batch.
|
180
|
-
In the final batch, idx_min = b * (B - 1) = b * floor(N / n) < N, and
|
181
|
-
idx_max is N. So, we our final batch size is
|
182
|
-
idx_max - idx_min = N - b * B = r.
|
183
|
-
|
184
|
-
returns: dictionary of size, BATCHSIZE, or for the final batch,
|
185
|
-
either an empty dictionary or dictionary of size NPARAMS mod BATCHSIZE
|
186
|
-
"""
|
187
|
-
idx = request.param
|
188
|
-
idx_start = idx * BATCHSIZE
|
189
|
-
idx_end = min((idx + 1) * BATCHSIZE, NPARAMS)
|
190
|
-
pnames = sorted_param_names[idx_start: idx_end]
|
191
|
-
return {pname: allparams[pname] for pname in pnames}
|
192
|
-
|
193
|
-
|
194
|
-
@pytest.fixture(scope='module', name='tc_objs',
|
195
|
-
params=[True, False])
|
196
|
-
def fixture_tc_objs(request, reform_xx, puf_subsample, cps_subsample):
|
197
|
-
"""
|
198
|
-
Fixture for creating Tax-Calculator objects that use the PUF and
|
199
|
-
use the CPS (called only twice: once for PUF and once for CPS)
|
200
|
-
"""
|
201
|
-
puftest = request.param
|
202
|
-
p_xx = Policy()
|
203
|
-
p_xx.implement_reform(reform_xx, raise_errors=False)
|
204
|
-
if puftest:
|
205
|
-
rec_xx = Records(data=puf_subsample)
|
206
|
-
else:
|
207
|
-
rec_xx = Records.cps_constructor(data=cps_subsample)
|
208
|
-
c_xx = Calculator(policy=p_xx, records=rec_xx)
|
209
|
-
c_xx.advance_to_year(TEST_YEAR)
|
210
|
-
c_xx.calc_all()
|
211
|
-
return rec_xx, c_xx, puftest
|
212
|
-
|
213
|
-
|
214
|
-
@pytest.mark.skip
|
215
|
-
@pytest.mark.pre_release
|
216
|
-
@pytest.mark.compatible_data
|
217
|
-
@pytest.mark.requires_pufcsv
|
218
|
-
def test_compatible_data(cps_subsample, puf_subsample,
|
219
|
-
allparams, reform_xx,
|
220
|
-
tc_objs, allparams_batch):
|
221
|
-
"""
|
222
|
-
Test that the compatible_data attribute in policy_current_law.json
|
223
|
-
is accurate by implementing the min and max values of each parameter
|
224
|
-
as reforms and ensuring that revenue differs from baseline when for
|
225
|
-
at least one of these reforms when using datasets marked compatible
|
226
|
-
and does not differ when using datasets marked as incompatible.
|
227
|
-
"""
|
228
|
-
# pylint: disable=too-many-arguments,too-many-positional-arguments
|
229
|
-
# pylint: disable=too-many-statements,too-many-branches,too-many-locals
|
230
|
-
|
231
|
-
# Check NPARAMS value
|
232
|
-
assert NPARAMS == len(allparams)
|
233
|
-
|
234
|
-
# Get taxcalc objects from tc_objs fixture
|
235
|
-
rec_xx, c_xx, puftest = tc_objs
|
236
|
-
|
237
|
-
# These parameters are exempt because they are not active under
|
238
|
-
# current law and activating them would deactivate other parameters,
|
239
|
-
# or if it is difficult to devise a test for them.
|
240
|
-
exempt_from_testing = [
|
241
|
-
'CG_ec', 'CG_reinvest_ec_rt',
|
242
|
-
'II_prt', 'ID_prt', 'ID_crt',
|
243
|
-
'CR_SchR_hc', 'ACTC_ChildNum'
|
244
|
-
]
|
245
|
-
|
246
|
-
# Loop through the parameters in allparams_batch
|
247
|
-
errmsg = 'ERROR: {} {}\n'
|
248
|
-
errors = ''
|
249
|
-
for pname in allparams_batch:
|
250
|
-
param = allparams_batch[pname]
|
251
|
-
max_listed = param['valid_values']['max']
|
252
|
-
# handle links to other params or self
|
253
|
-
if isinstance(max_listed, str):
|
254
|
-
if isinstance(allparams[max_listed]['value'][0], list):
|
255
|
-
max_val = allparams[max_listed]['value'][0]
|
256
|
-
else:
|
257
|
-
max_val = float(allparams[max_listed]['value'][0])
|
258
|
-
else:
|
259
|
-
if isinstance(param['value'][0], list):
|
260
|
-
max_val = [max_listed] * len(param['value'][0])
|
261
|
-
else:
|
262
|
-
max_val = max_listed
|
263
|
-
min_listed = param['valid_values']['min']
|
264
|
-
if isinstance(min_listed, str):
|
265
|
-
if isinstance(allparams[min_listed]['value'][0], list):
|
266
|
-
min_val = allparams[min_listed]['value'][0]
|
267
|
-
else:
|
268
|
-
min_val = float(allparams[min_listed]['value'][0])
|
269
|
-
else:
|
270
|
-
if isinstance(param['value'][0], list):
|
271
|
-
min_val = [min_listed] * len(param['value'][0])
|
272
|
-
else:
|
273
|
-
min_val = min_listed
|
274
|
-
# create reform dictionaries
|
275
|
-
max_reform = copy.deepcopy(reform_xx)
|
276
|
-
min_reform = copy.deepcopy(reform_xx)
|
277
|
-
max_reform[XX_YEAR][str(pname)] = [max_val]
|
278
|
-
min_reform[XX_YEAR][str(pname)] = [min_val]
|
279
|
-
# assess whether max reform changes results
|
280
|
-
if puftest:
|
281
|
-
rec_yy = Records(data=puf_subsample)
|
282
|
-
else:
|
283
|
-
rec_yy = Records.cps_constructor(data=cps_subsample)
|
284
|
-
p_yy = Policy()
|
285
|
-
p_yy.implement_reform(max_reform, raise_errors=False)
|
286
|
-
c_yy = Calculator(policy=p_yy, records=rec_yy, verbose=False)
|
287
|
-
c_yy.advance_to_year(TEST_YEAR)
|
288
|
-
c_yy.calc_all()
|
289
|
-
if pname.startswith('BEN') and pname.endswith('_repeal'):
|
290
|
-
max_reform_change = (
|
291
|
-
c_yy.weighted_total('benefit_cost_total') -
|
292
|
-
c_xx.weighted_total('benefit_cost_total')
|
293
|
-
)
|
294
|
-
else:
|
295
|
-
max_reform_change = (
|
296
|
-
c_yy.weighted_total('combined') -
|
297
|
-
c_xx.weighted_total('combined')
|
298
|
-
)
|
299
|
-
min_reform_change = 0
|
300
|
-
# assess whether min reform changes results, if max reform did not
|
301
|
-
if max_reform_change == 0:
|
302
|
-
p_yy = Policy()
|
303
|
-
p_yy.implement_reform(min_reform, raise_errors=False)
|
304
|
-
c_yy = Calculator(policy=p_yy, records=rec_xx)
|
305
|
-
c_yy.advance_to_year(TEST_YEAR)
|
306
|
-
c_yy.calc_all()
|
307
|
-
if pname.startswith('BEN') and pname.endswith('_repeal'):
|
308
|
-
min_reform_change = (
|
309
|
-
c_yy.weighted_total('benefit_cost_total') -
|
310
|
-
c_xx.weighted_total('benefit_cost_total')
|
311
|
-
)
|
312
|
-
else:
|
313
|
-
min_reform_change = (
|
314
|
-
c_yy.weighted_total('combined') -
|
315
|
-
c_xx.weighted_total('combined')
|
316
|
-
)
|
317
|
-
if min_reform_change == 0 and pname not in exempt_from_testing:
|
318
|
-
if puftest:
|
319
|
-
if param['compatible_data']['puf'] is True:
|
320
|
-
errors += errmsg.format(pname, 'is not True for puf')
|
321
|
-
else:
|
322
|
-
if param['compatible_data']['cps'] is True:
|
323
|
-
errors += errmsg.format(pname, 'is not True for cps')
|
324
|
-
if max_reform_change != 0 or min_reform_change != 0:
|
325
|
-
if puftest:
|
326
|
-
if param['compatible_data']['puf'] is False:
|
327
|
-
errors += errmsg.format(pname, 'is not False for puf')
|
328
|
-
else:
|
329
|
-
if param['compatible_data']['cps'] is False:
|
330
|
-
errors += errmsg.format(pname, 'is not False for cps')
|
331
|
-
# test failure if any errors
|
332
|
-
if errors:
|
333
|
-
print(errors)
|
334
|
-
assert False, 'ERROR: compatible_data is invalid; see errors above'
|
@@ -1,194 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Test generates statistics for puf.csv variables.
|
3
|
-
"""
|
4
|
-
# CODING-STYLE CHECKS:
|
5
|
-
# pycodestyle test_puf_var_stats.py
|
6
|
-
# pylint --disable=locally-disabled test_puf_var_stats.py
|
7
|
-
|
8
|
-
import os
|
9
|
-
import json
|
10
|
-
import copy
|
11
|
-
import numpy as np
|
12
|
-
import pandas as pd
|
13
|
-
import pytest
|
14
|
-
from taxcalc.policy import Policy
|
15
|
-
from taxcalc.records import Records
|
16
|
-
from taxcalc.calculator import Calculator
|
17
|
-
|
18
|
-
|
19
|
-
def create_base_table(test_path):
|
20
|
-
"""
|
21
|
-
Create and return base table.
|
22
|
-
"""
|
23
|
-
# specify calculated variable names and descriptions
|
24
|
-
calc_dict = {'eitc': 'Federal EITC',
|
25
|
-
'iitax': 'Federal income tax liability',
|
26
|
-
'payrolltax': 'Payroll taxes (ee+er) for OASDI+HI',
|
27
|
-
'c00100': 'Federal AGI',
|
28
|
-
'c02500': 'OASDI benefits in AGI',
|
29
|
-
'c04600': 'Post-phase-out personal exemption',
|
30
|
-
'c21040': 'Itemized deduction that is phased out',
|
31
|
-
'c04470': 'Post-phase-out itemized deduction',
|
32
|
-
'c04800': 'Federal regular taxable income',
|
33
|
-
'c05200': 'Regular tax on taxable income',
|
34
|
-
'c07220': 'Child tax credit (adjusted)',
|
35
|
-
'c11070': 'Extra child tax credit (refunded)',
|
36
|
-
'c07180': 'Child care credit',
|
37
|
-
'c09600': 'Federal AMT liability'}
|
38
|
-
# specify read variable names and descriptions
|
39
|
-
unused_var_set = set(['DSI', 'EIC',
|
40
|
-
'h_seq', 'a_lineno', 'ffpos', 'fips', 'agi_bin',
|
41
|
-
'FLPDYR', 'FLPDMO', 'f2441', 'f3800', 'f6251',
|
42
|
-
'f8582', 'f8606', 'f8829', 'f8910', 'f8936', 'n20',
|
43
|
-
'n24', 'n25', 'n30', 'PREP', 'SCHB', 'SCHCF', 'SCHE',
|
44
|
-
'TFORM', 'IE', 'TXST', 'XFPT', 'XFST', 'XOCAH',
|
45
|
-
'XOCAWH', 'XOODEP', 'XOPAR', 'XTOT', 'MARS', 'MIDR',
|
46
|
-
'RECID', 'gender', 'wage_head', 'wage_spouse',
|
47
|
-
'earnsplit', 'agedp1', 'agedp2', 'agedp3',
|
48
|
-
's006', 's008', 's009', 'WSAMP', 'TXRT',
|
49
|
-
'matched_weight', 'e00200p', 'e00200s',
|
50
|
-
'e00900p', 'e00900s', 'e02100p', 'e02100s',
|
51
|
-
'age_head', 'age_spouse',
|
52
|
-
'nu18', 'n1820', 'n21',
|
53
|
-
'ssi_ben', 'snap_ben', 'other_ben',
|
54
|
-
'mcare_ben', 'mcaid_ben', 'vet_ben',
|
55
|
-
'housing_ben', 'tanf_ben', 'wic_ben',
|
56
|
-
'blind_head', 'blind_spouse',
|
57
|
-
'PT_SSTB_income',
|
58
|
-
'PT_binc_w2_wages',
|
59
|
-
'PT_ubia_property'])
|
60
|
-
records_varinfo = Records(data=None)
|
61
|
-
read_vars = list(records_varinfo.USABLE_READ_VARS - unused_var_set)
|
62
|
-
# get read variable information from JSON file
|
63
|
-
rec_vars_path = os.path.join(test_path, '..', 'records_variables.json')
|
64
|
-
with open(rec_vars_path, 'r', encoding='utf-8') as rvfile:
|
65
|
-
read_var_dict = json.load(rvfile)
|
66
|
-
# create table_dict with sorted read vars followed by sorted calc vars
|
67
|
-
table_dict = {}
|
68
|
-
for var in sorted(read_vars):
|
69
|
-
if "taxdata_puf" in read_var_dict['read'][var]['availability']:
|
70
|
-
table_dict[var] = read_var_dict['read'][var]['desc']
|
71
|
-
else:
|
72
|
-
pass
|
73
|
-
sorted_calc_vars = sorted(calc_dict.keys())
|
74
|
-
for var in sorted_calc_vars:
|
75
|
-
table_dict[var] = calc_dict[var]
|
76
|
-
# construct DataFrame table from table_dict
|
77
|
-
table = pd.DataFrame.from_dict(table_dict, orient='index')
|
78
|
-
table.columns = ['description']
|
79
|
-
return table
|
80
|
-
|
81
|
-
|
82
|
-
def calculate_corr_stats(calc, table):
|
83
|
-
"""
|
84
|
-
Calculate correlation coefficient matrix.
|
85
|
-
"""
|
86
|
-
errmsg = ''
|
87
|
-
for varname1 in table.index:
|
88
|
-
var1 = calc.array(varname1)
|
89
|
-
var1_cc = []
|
90
|
-
for varname2 in table.index:
|
91
|
-
var2 = calc.array(varname2)
|
92
|
-
try:
|
93
|
-
cor = np.corrcoef(var1, var2)[0][1]
|
94
|
-
except FloatingPointError:
|
95
|
-
msg = f'corr-coef error for {varname1} and {varname2}\n'
|
96
|
-
errmsg += msg
|
97
|
-
cor = 9.99 # because could not compute it
|
98
|
-
var1_cc.append(cor)
|
99
|
-
table[varname1] = var1_cc
|
100
|
-
if errmsg:
|
101
|
-
raise ValueError('\n' + errmsg)
|
102
|
-
|
103
|
-
|
104
|
-
def calculate_mean_stats(calc, table, year):
|
105
|
-
"""
|
106
|
-
Calculate weighted means for year.
|
107
|
-
"""
|
108
|
-
total_weight = calc.total_weight()
|
109
|
-
means = []
|
110
|
-
for varname in table.index:
|
111
|
-
wmean = calc.weighted_total(varname) / total_weight
|
112
|
-
means.append(wmean)
|
113
|
-
table[str(year)] = means
|
114
|
-
|
115
|
-
|
116
|
-
def differences(new_filename, old_filename, stat_kind):
|
117
|
-
"""
|
118
|
-
Return message string if differences detected by np.allclose();
|
119
|
-
otherwise return empty string.
|
120
|
-
"""
|
121
|
-
new_df = pd.read_csv(new_filename)
|
122
|
-
old_df = pd.read_csv(old_filename)
|
123
|
-
diffs = False
|
124
|
-
if list(new_df.columns.values) == list(old_df.columns.values):
|
125
|
-
for col in new_df.columns[1:]:
|
126
|
-
if col == 'description':
|
127
|
-
continue # skip description column
|
128
|
-
if not np.allclose(new_df[col], old_df[col]):
|
129
|
-
diffs = True
|
130
|
-
else:
|
131
|
-
diffs = True
|
132
|
-
if diffs:
|
133
|
-
new_name = os.path.basename(new_filename)
|
134
|
-
old_name = os.path.basename(old_filename)
|
135
|
-
msg = f'{stat_kind} RESULTS DIFFER:\n'
|
136
|
-
msg += '-------------------------------------------------'
|
137
|
-
msg += '-------------\n'
|
138
|
-
msg += f'--- NEW RESULTS IN {new_name} FILE ---\n'
|
139
|
-
msg += f'--- if new OK, copy {new_name} to\n'
|
140
|
-
msg += f'--- {old_name} \n'
|
141
|
-
msg += '--- and rerun test. '
|
142
|
-
msg += '-------------------------------------------------'
|
143
|
-
msg += '-------------\n'
|
144
|
-
else:
|
145
|
-
msg = ''
|
146
|
-
os.remove(new_filename)
|
147
|
-
return msg
|
148
|
-
|
149
|
-
|
150
|
-
MEAN_FILENAME = 'puf_var_wght_means_by_year.csv'
|
151
|
-
CORR_FILENAME = 'puf_var_correl_coeffs_2016.csv'
|
152
|
-
|
153
|
-
|
154
|
-
@pytest.mark.requires_pufcsv
|
155
|
-
def test_puf_var_stats(tests_path, puf_fullsample):
|
156
|
-
"""
|
157
|
-
Main logic of test.
|
158
|
-
"""
|
159
|
-
# create a baseline Policy object containing 2017_law.json parameters
|
160
|
-
pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
|
161
|
-
pre_tcja = Policy.read_json_reform(pre_tcja_jrf)
|
162
|
-
baseline_policy = Policy()
|
163
|
-
baseline_policy.implement_reform(pre_tcja)
|
164
|
-
# create a Calculator object using baseline_policy and full puf.csv sample
|
165
|
-
rec = Records(data=puf_fullsample)
|
166
|
-
calc = Calculator(policy=baseline_policy, records=rec, verbose=False)
|
167
|
-
# create base tables
|
168
|
-
table_mean = create_base_table(tests_path)
|
169
|
-
table_corr = copy.deepcopy(table_mean)
|
170
|
-
del table_corr['description']
|
171
|
-
# add statistics to tables
|
172
|
-
year_headers = ['description']
|
173
|
-
for year in range(Policy.JSON_START_YEAR, 2024 + 1):
|
174
|
-
assert year == calc.current_year
|
175
|
-
year_headers.append(str(year))
|
176
|
-
calc.calc_all()
|
177
|
-
calculate_mean_stats(calc, table_mean, year)
|
178
|
-
if year == 2016:
|
179
|
-
calculate_corr_stats(calc, table_corr)
|
180
|
-
if year < 2034:
|
181
|
-
calc.increment_year()
|
182
|
-
# write tables to new CSV files
|
183
|
-
mean_path = os.path.join(tests_path, MEAN_FILENAME + '-new')
|
184
|
-
table_mean.sort_index(inplace=True)
|
185
|
-
table_mean.to_csv(mean_path, header=year_headers, float_format='%8.0f')
|
186
|
-
corr_path = os.path.join(tests_path, CORR_FILENAME + '-new')
|
187
|
-
table_corr.sort_index(inplace=True)
|
188
|
-
table_corr.to_csv(corr_path, float_format='%8.2f',
|
189
|
-
columns=table_corr.index)
|
190
|
-
# compare new and old CSV files for differences
|
191
|
-
mean_msg = differences(mean_path, mean_path[:-4], 'MEAN')
|
192
|
-
corr_msg = differences(corr_path, corr_path[:-4], 'CORR')
|
193
|
-
if mean_msg or corr_msg:
|
194
|
-
raise ValueError(mean_msg + corr_msg)
|