taxcalc 5.2.0__py3-none-any.whl → 6.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- taxcalc/__init__.py +3 -3
- taxcalc/calcfunctions.py +2 -2
- taxcalc/calculator.py +4 -4
- taxcalc/cli/tc.py +16 -19
- taxcalc/data.py +2 -3
- taxcalc/decorators.py +9 -8
- taxcalc/growfactors.py +2 -1
- taxcalc/policy.py +6 -23
- taxcalc/policy_current_law.json +31 -631
- taxcalc/records.py +78 -82
- taxcalc/records_variables.json +106 -106
- taxcalc/reforms/ARPA.out.csv +9 -9
- taxcalc/taxcalcio.py +101 -77
- taxcalc/tests/conftest.py +20 -15
- taxcalc/tests/puf_var_correl_coeffs_2016.csv +24 -24
- taxcalc/tests/puf_var_wght_means_by_year.csv +11 -11
- taxcalc/tests/pufcsv_agg_expect.csv +20 -20
- taxcalc/tests/pufcsv_mtr_expect.txt +21 -21
- taxcalc/tests/reforms.json +3 -1
- taxcalc/tests/reforms_expect.csv +54 -54
- taxcalc/tests/test_4package.py +8 -9
- taxcalc/tests/test_calculator.py +55 -18
- taxcalc/tests/test_consumption.py +2 -2
- taxcalc/tests/test_cpscsv.py +2 -24
- taxcalc/tests/test_data.py +11 -3
- taxcalc/tests/test_decorators.py +57 -52
- taxcalc/tests/test_growdiff.py +2 -2
- taxcalc/tests/test_parameters.py +101 -53
- taxcalc/tests/test_policy.py +154 -154
- taxcalc/tests/test_records.py +144 -9
- taxcalc/tests/test_reforms.py +104 -104
- taxcalc/tests/test_taxcalcio.py +13 -62
- taxcalc/utils.py +3 -3
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/METADATA +3 -6
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/RECORD +39 -46
- taxcalc/puf_ratios.csv +0 -26
- taxcalc/puf_weights.csv.gz +0 -0
- taxcalc/reforms/clp.out.csv +0 -10
- taxcalc/tests/test_compare.py +0 -330
- taxcalc/tests/test_compatible_data.py +0 -334
- taxcalc/tests/test_puf_var_stats.py +0 -194
- taxcalc/tests/test_pufcsv.py +0 -328
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/WHEEL +0 -0
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/entry_points.txt +0 -0
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/licenses/LICENSE +0 -0
- {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/top_level.txt +0 -0
taxcalc/tests/test_pufcsv.py
DELETED
@@ -1,328 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Tests of Tax-Calculator using puf.csv input.
|
3
|
-
|
4
|
-
Note that the puf.csv file that is required to run this program has
|
5
|
-
been constructed by the Tax-Calculator development team by merging
|
6
|
-
information from the most recent publicly available IRS SOI PUF file
|
7
|
-
and from the Census CPS file for the corresponding year. If you have
|
8
|
-
acquired from IRS the most recent SOI PUF file and want to execute
|
9
|
-
this program, contact the Tax-Calculator development team to discuss
|
10
|
-
your options.
|
11
|
-
"""
|
12
|
-
# CODING-STYLE CHECKS:
|
13
|
-
# pycodestyle test_pufcsv.py
|
14
|
-
# pylint --disable=locally-disabled test_pufcsv.py
|
15
|
-
|
16
|
-
import os
|
17
|
-
import json
|
18
|
-
import pytest
|
19
|
-
import numpy as np
|
20
|
-
import pandas as pd
|
21
|
-
from taxcalc.policy import Policy
|
22
|
-
from taxcalc.records import Records
|
23
|
-
from taxcalc.calculator import Calculator
|
24
|
-
|
25
|
-
|
26
|
-
START_YEAR = 2017
|
27
|
-
NUM_YEARS = 19
|
28
|
-
|
29
|
-
|
30
|
-
@pytest.mark.pufcsv_agg
|
31
|
-
@pytest.mark.requires_pufcsv
|
32
|
-
def test_agg(tests_path, puf_fullsample):
|
33
|
-
"""
|
34
|
-
Test Tax-Calculator aggregate taxes with no policy reform using
|
35
|
-
the full-sample puf.csv and a small sub-sample of puf.csv
|
36
|
-
"""
|
37
|
-
# pylint: disable=too-many-locals,too-many-statements
|
38
|
-
nyrs = NUM_YEARS
|
39
|
-
# create a baseline Policy object with current-law policy parameters
|
40
|
-
baseline_policy = Policy()
|
41
|
-
# create a Records object (rec) containing all puf.csv input records
|
42
|
-
recs = Records(data=puf_fullsample)
|
43
|
-
# create a Calculator object using baseline policy and puf records
|
44
|
-
calc = Calculator(policy=baseline_policy, records=recs)
|
45
|
-
calc.advance_to_year(START_YEAR)
|
46
|
-
calc_start_year = calc.current_year
|
47
|
-
# create aggregate diagnostic table (adt) as a Pandas DataFrame object
|
48
|
-
adt = calc.diagnostic_table(nyrs).round(1) # column labels are int
|
49
|
-
taxes_fullsample = adt.loc["Combined Liability ($b)"]
|
50
|
-
# compare actual DataFrame, adt, with the expected DataFrame, edt
|
51
|
-
aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.csv')
|
52
|
-
edt = pd.read_csv(aggres_path, index_col=False) # column labels are str
|
53
|
-
edt.drop('Unnamed: 0', axis='columns', inplace=True)
|
54
|
-
assert len(adt.columns.values) == len(edt.columns.values)
|
55
|
-
diffs = False
|
56
|
-
for icol in adt.columns.values:
|
57
|
-
if not np.allclose(adt[icol].values, edt[str(icol)].values):
|
58
|
-
diffs = True
|
59
|
-
if diffs:
|
60
|
-
new_filename = f'{aggres_path[:-10]}actual.csv'
|
61
|
-
adt.to_csv(new_filename, float_format='%.1f')
|
62
|
-
msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n'
|
63
|
-
msg += '-------------------------------------------------\n'
|
64
|
-
msg += '--- NEW RESULTS IN pufcsv_agg_actual.csv FILE ---\n'
|
65
|
-
msg += '--- if new OK, copy pufcsv_agg_actual.csv to ---\n'
|
66
|
-
msg += '--- pufcsv_agg_expect.csv ---\n'
|
67
|
-
msg += '--- and rerun test. ---\n'
|
68
|
-
msg += '--- (both are in taxcalc/tests) ---\n'
|
69
|
-
msg += '-------------------------------------------------\n'
|
70
|
-
raise ValueError(msg)
|
71
|
-
# create aggregate diagnostic table using unweighted sub-sample of records
|
72
|
-
fullsample = puf_fullsample
|
73
|
-
rn_seed = 2222 # to ensure sub-sample is always the same
|
74
|
-
subfrac = 0.05 # sub-sample fraction
|
75
|
-
subsample = fullsample.sample(frac=subfrac, random_state=rn_seed)
|
76
|
-
recs_subsample = Records(data=subsample)
|
77
|
-
calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample)
|
78
|
-
calc_subsample.advance_to_year(START_YEAR)
|
79
|
-
adt_subsample = calc_subsample.diagnostic_table(nyrs)
|
80
|
-
# compare combined tax liability from full and sub samples for each year
|
81
|
-
taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
|
82
|
-
msg = ''
|
83
|
-
for cyr in range(calc_start_year, calc_start_year + nyrs):
|
84
|
-
reltol = 0.031 # maximum allowed relative difference in tax liability
|
85
|
-
if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr],
|
86
|
-
atol=0.0, rtol=reltol):
|
87
|
-
reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1.
|
88
|
-
line1 = '\nPUFCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}'
|
89
|
-
line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}'
|
90
|
-
line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}'
|
91
|
-
msg += line1.format(cyr)
|
92
|
-
msg += line2.format(subfrac, reltol, rn_seed)
|
93
|
-
msg += line3.format(taxes_subsample[cyr],
|
94
|
-
taxes_fullsample[cyr],
|
95
|
-
reldiff)
|
96
|
-
if msg:
|
97
|
-
raise ValueError(msg)
|
98
|
-
|
99
|
-
|
100
|
-
MTR_TAX_YEAR = 2013
|
101
|
-
MTR_NEG_DIFF = False # set True to subtract (rather than add) small amount
|
102
|
-
# specify payrolltax mtr histogram bin boundaries (or edges):
|
103
|
-
PTAX_MTR_BIN_EDGES = [0.0, 0.02, 0.04, 0.06, 0.08,
|
104
|
-
0.10, 0.12, 0.14, 0.16, 0.18, 1.0]
|
105
|
-
# the bin boundaries above are arbitrary, so users
|
106
|
-
# may want to experiment with alternative boundaries
|
107
|
-
# specify incometax mtr histogram bin boundaries (or edges):
|
108
|
-
ITAX_MTR_BIN_EDGES = [-1.0, -0.30, -0.20, -0.10, 0.0,
|
109
|
-
0.10, 0.20, 0.30, 0.40, 0.50, 1.0]
|
110
|
-
# the bin boundaries above are arbitrary, so users
|
111
|
-
# may want to experiment with alternative boundaries
|
112
|
-
|
113
|
-
|
114
|
-
def mtr_bin_counts(mtr_data, bin_edges, recid):
|
115
|
-
"""
|
116
|
-
Compute mtr histogram bin counts and return results as a string.
|
117
|
-
"""
|
118
|
-
res = ''
|
119
|
-
(bincount, _) = np.histogram(mtr_data.round(decimals=4), bins=bin_edges)
|
120
|
-
sum_bincount = np.sum(bincount)
|
121
|
-
res += f'{sum_bincount} :'
|
122
|
-
for idx in range(len(bin_edges) - 1):
|
123
|
-
res += f' {bincount[idx]:6d}'
|
124
|
-
res += '\n'
|
125
|
-
if sum_bincount < mtr_data.size:
|
126
|
-
res += 'WARNING: sum of bin counts is too low\n'
|
127
|
-
mtr_min = mtr_data.min()
|
128
|
-
mtr_max = mtr_data.max()
|
129
|
-
bin_min = min(bin_edges)
|
130
|
-
bin_max = max(bin_edges)
|
131
|
-
if mtr_min < bin_min:
|
132
|
-
res += f' min(mtr)={mtr_min:.2f}\n'
|
133
|
-
for idx in range(mtr_data.size):
|
134
|
-
if mtr_data[idx] < bin_min:
|
135
|
-
res += (
|
136
|
-
f' mtr={mtr_data[idx]:.2f} '
|
137
|
-
f'for recid={recid[idx]}\n'
|
138
|
-
)
|
139
|
-
if mtr_max > bin_max:
|
140
|
-
res += f' max(mtr)={mtr_max:.2f}\n'
|
141
|
-
for idx in range(mtr_data.size):
|
142
|
-
if mtr_data[idx] > bin_max:
|
143
|
-
res += (
|
144
|
-
f' mtr={mtr_data[idx]:.2f} '
|
145
|
-
f'for recid={recid[idx]}\n'
|
146
|
-
)
|
147
|
-
return res
|
148
|
-
|
149
|
-
|
150
|
-
def nonsmall_diffs(linelist1, linelist2, small=0.0):
|
151
|
-
"""
|
152
|
-
Return True if line lists differ significantly; otherwise return False.
|
153
|
-
Significant numerical difference means one or more numbers differ (between
|
154
|
-
linelist1 and linelist2) by more than the specified small amount.
|
155
|
-
"""
|
156
|
-
# embedded function used only in nonsmall_diffs function
|
157
|
-
def isfloat(value):
|
158
|
-
"""
|
159
|
-
Return True if value can be cast to float; otherwise return False.
|
160
|
-
"""
|
161
|
-
try:
|
162
|
-
float(value)
|
163
|
-
return True
|
164
|
-
except ValueError:
|
165
|
-
return False
|
166
|
-
# begin nonsmall_diffs logic
|
167
|
-
assert isinstance(linelist1, list)
|
168
|
-
assert isinstance(linelist2, list)
|
169
|
-
if len(linelist1) != len(linelist2):
|
170
|
-
return True
|
171
|
-
assert 0.0 <= small <= 1.0
|
172
|
-
epsilon = 1e-6
|
173
|
-
smallamt = small + epsilon
|
174
|
-
for line1, line2 in zip(linelist1, linelist2):
|
175
|
-
if line1 == line2:
|
176
|
-
continue
|
177
|
-
tokens1 = line1.replace(',', '').split()
|
178
|
-
tokens2 = line2.replace(',', '').split()
|
179
|
-
for tok1, tok2 in zip(tokens1, tokens2):
|
180
|
-
tok1_isfloat = isfloat(tok1)
|
181
|
-
tok2_isfloat = isfloat(tok2)
|
182
|
-
if tok1_isfloat and tok2_isfloat:
|
183
|
-
if abs(float(tok1) - float(tok2)) <= smallamt:
|
184
|
-
continue
|
185
|
-
return True
|
186
|
-
if not tok1_isfloat and not tok2_isfloat:
|
187
|
-
if tok1 == tok2:
|
188
|
-
continue
|
189
|
-
return True
|
190
|
-
return True
|
191
|
-
return False
|
192
|
-
|
193
|
-
|
194
|
-
@pytest.mark.requires_pufcsv
|
195
|
-
def test_mtr(tests_path, puf_path):
|
196
|
-
"""
|
197
|
-
Test Tax-Calculator marginal tax rates with no policy reform using puf.csv
|
198
|
-
|
199
|
-
Compute histograms for each marginal tax rate income type using
|
200
|
-
sample input from the puf.csv file and writing output to a string,
|
201
|
-
which is then compared for differences with EXPECTED_MTR_RESULTS.
|
202
|
-
"""
|
203
|
-
# pylint: disable=too-many-locals,too-many-statements
|
204
|
-
assert len(PTAX_MTR_BIN_EDGES) == len(ITAX_MTR_BIN_EDGES)
|
205
|
-
# construct actual results string, res
|
206
|
-
res = ''
|
207
|
-
if MTR_NEG_DIFF:
|
208
|
-
res += 'MTR computed using NEGATIVE finite_diff '
|
209
|
-
else:
|
210
|
-
res += 'MTR computed using POSITIVE finite_diff '
|
211
|
-
res += f'for tax year {MTR_TAX_YEAR}\n'
|
212
|
-
# create a Policy object (clp) containing current-law policy parameters
|
213
|
-
clp = Policy()
|
214
|
-
clp.set_year(MTR_TAX_YEAR)
|
215
|
-
# create a Records object (puf) containing puf.csv input records
|
216
|
-
puf = Records(data=puf_path)
|
217
|
-
recid = puf.RECID # pylint: disable=no-member
|
218
|
-
# create a Calculator object using clp policy and puf records
|
219
|
-
calc = Calculator(policy=clp, records=puf)
|
220
|
-
res += f'Total number of data records = {puf.array_length}\n'
|
221
|
-
res += 'PTAX mtr histogram bin edges:\n'
|
222
|
-
res += f' {PTAX_MTR_BIN_EDGES}\n'
|
223
|
-
res += 'ITAX mtr histogram bin edges:\n'
|
224
|
-
res += f' {ITAX_MTR_BIN_EDGES}\n'
|
225
|
-
variable_header = 'PTAX and ITAX mtr histogram bin counts for'
|
226
|
-
# compute marginal tax rate (mtr) histograms for each mtr variable
|
227
|
-
for var_str in Calculator.MTR_VALID_VARIABLES:
|
228
|
-
zero_out = var_str == 'e01400'
|
229
|
-
(mtr_ptax, mtr_itax, _) = calc.mtr(variable_str=var_str,
|
230
|
-
negative_finite_diff=MTR_NEG_DIFF,
|
231
|
-
zero_out_calculated_vars=zero_out,
|
232
|
-
wrt_full_compensation=False)
|
233
|
-
if zero_out:
|
234
|
-
# check that calculated variables are consistent
|
235
|
-
assert np.allclose((calc.array('iitax') +
|
236
|
-
calc.array('payrolltax')),
|
237
|
-
calc.array('combined'))
|
238
|
-
assert np.allclose(calc.array('ptax_was'),
|
239
|
-
calc.array('payrolltax'))
|
240
|
-
assert np.allclose(calc.array('c21060') - calc.array('c21040'),
|
241
|
-
calc.array('c04470'))
|
242
|
-
assert np.allclose(calc.array('taxbc') + calc.array('c09600'),
|
243
|
-
calc.array('c05800'))
|
244
|
-
assert np.allclose((calc.array('c05800') +
|
245
|
-
calc.array('othertaxes') -
|
246
|
-
calc.array('c07100')),
|
247
|
-
calc.array('c09200'))
|
248
|
-
assert np.allclose(calc.array('c09200') - calc.array('refund'),
|
249
|
-
calc.array('iitax'))
|
250
|
-
if var_str == 'e00200s':
|
251
|
-
# only MARS==2 filing units have valid MTR values
|
252
|
-
mtr_ptax = mtr_ptax[calc.array('MARS') == 2]
|
253
|
-
mtr_itax = mtr_itax[calc.array('MARS') == 2]
|
254
|
-
res += f'{variable_header} {var_str}:\n'
|
255
|
-
res += mtr_bin_counts(mtr_ptax, PTAX_MTR_BIN_EDGES, recid)
|
256
|
-
res += mtr_bin_counts(mtr_itax, ITAX_MTR_BIN_EDGES, recid)
|
257
|
-
# check for differences between actual and expected results
|
258
|
-
mtrres_path = os.path.join(tests_path, 'pufcsv_mtr_expect.txt')
|
259
|
-
with open(mtrres_path, 'r', encoding='utf-8') as expected_file:
|
260
|
-
txt = expected_file.read()
|
261
|
-
expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt
|
262
|
-
if nonsmall_diffs(res.splitlines(True), expected_results.splitlines(True)):
|
263
|
-
new_filename = f'{mtrres_path[:-10]}actual.txt'
|
264
|
-
with open(new_filename, 'w', encoding='utf-8') as new_file:
|
265
|
-
new_file.write(res)
|
266
|
-
msg = 'PUFCSV MTR RESULTS DIFFER\n'
|
267
|
-
msg += '-------------------------------------------------\n'
|
268
|
-
msg += '--- NEW RESULTS IN pufcsv_mtr_actual.txt FILE ---\n'
|
269
|
-
msg += '--- if new OK, copy pufcsv_mtr_actual.txt to ---\n'
|
270
|
-
msg += '--- pufcsv_mtr_expect.txt ---\n'
|
271
|
-
msg += '--- and rerun test. ---\n'
|
272
|
-
msg += '-------------------------------------------------\n'
|
273
|
-
raise ValueError(msg)
|
274
|
-
|
275
|
-
|
276
|
-
@pytest.mark.requires_pufcsv
|
277
|
-
def test_credit_reforms(puf_subsample):
|
278
|
-
"""
|
279
|
-
Test personal credit reforms using puf.csv subsample
|
280
|
-
"""
|
281
|
-
rec = Records(data=puf_subsample)
|
282
|
-
reform_year = 2017
|
283
|
-
# create current-law Calculator object, calc1
|
284
|
-
pol = Policy()
|
285
|
-
calc1 = Calculator(policy=pol, records=rec)
|
286
|
-
calc1.advance_to_year(reform_year)
|
287
|
-
calc1.calc_all()
|
288
|
-
itax1 = calc1.weighted_total('iitax')
|
289
|
-
# create personal-refundable-credit-reform Calculator object, calc2
|
290
|
-
reform = {'II_credit': {reform_year: [1000, 1000, 1000, 1000, 1000]}}
|
291
|
-
pol.implement_reform(reform)
|
292
|
-
calc2 = Calculator(policy=pol, records=rec)
|
293
|
-
calc2.advance_to_year(reform_year)
|
294
|
-
calc2.calc_all()
|
295
|
-
itax2 = calc2.weighted_total('iitax')
|
296
|
-
# create personal-nonrefundable-credit-reform Calculator object, calc3
|
297
|
-
reform = {'II_credit_nr': {reform_year: [1000, 1000, 1000, 1000, 1000]}}
|
298
|
-
pol = Policy()
|
299
|
-
pol.implement_reform(reform)
|
300
|
-
calc3 = Calculator(policy=pol, records=rec)
|
301
|
-
calc3.advance_to_year(reform_year)
|
302
|
-
calc3.calc_all()
|
303
|
-
itax3 = calc3.weighted_total('iitax')
|
304
|
-
# check income tax revenues generated by the three Calculator objects
|
305
|
-
assert itax2 < itax1 # because refundable credits lower revenues
|
306
|
-
assert itax3 > itax2 # because nonrefundable credits lower revenues less
|
307
|
-
assert itax3 < itax1 # because nonrefundable credits lower revenues some
|
308
|
-
|
309
|
-
|
310
|
-
@pytest.mark.requires_pufcsv
|
311
|
-
def test_puf_availability(tests_path, puf_path):
|
312
|
-
"""
|
313
|
-
Cross-check records_variables.json data with variables in puf.csv file
|
314
|
-
"""
|
315
|
-
# make set of variable names in puf.csv file
|
316
|
-
pufdf = pd.read_csv(puf_path)
|
317
|
-
pufvars = set(list(pufdf))
|
318
|
-
# make set of variable names that are marked as puf.csv available
|
319
|
-
rvpath = os.path.join(tests_path, '..', 'records_variables.json')
|
320
|
-
with open(rvpath, 'r', encoding='utf-8') as rvfile:
|
321
|
-
rvdict = json.load(rvfile)
|
322
|
-
recvars = set()
|
323
|
-
for vname, vdict in rvdict['read'].items():
|
324
|
-
if 'taxdata_puf' in vdict.get('availability', ''):
|
325
|
-
recvars.add(vname)
|
326
|
-
# check that pufvars and recvars sets are the same
|
327
|
-
assert (pufvars - recvars) == set()
|
328
|
-
assert (recvars - pufvars) == set()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|