taxcalc 5.2.0__py3-none-any.whl → 6.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. taxcalc/__init__.py +3 -3
  2. taxcalc/calcfunctions.py +2 -2
  3. taxcalc/calculator.py +4 -4
  4. taxcalc/cli/tc.py +16 -19
  5. taxcalc/data.py +2 -3
  6. taxcalc/decorators.py +9 -8
  7. taxcalc/growfactors.py +2 -1
  8. taxcalc/policy.py +6 -23
  9. taxcalc/policy_current_law.json +31 -631
  10. taxcalc/records.py +78 -82
  11. taxcalc/records_variables.json +106 -106
  12. taxcalc/reforms/ARPA.out.csv +9 -9
  13. taxcalc/taxcalcio.py +101 -77
  14. taxcalc/tests/conftest.py +20 -15
  15. taxcalc/tests/puf_var_correl_coeffs_2016.csv +24 -24
  16. taxcalc/tests/puf_var_wght_means_by_year.csv +11 -11
  17. taxcalc/tests/pufcsv_agg_expect.csv +20 -20
  18. taxcalc/tests/pufcsv_mtr_expect.txt +21 -21
  19. taxcalc/tests/reforms.json +3 -1
  20. taxcalc/tests/reforms_expect.csv +54 -54
  21. taxcalc/tests/test_4package.py +8 -9
  22. taxcalc/tests/test_calculator.py +55 -18
  23. taxcalc/tests/test_consumption.py +2 -2
  24. taxcalc/tests/test_cpscsv.py +2 -24
  25. taxcalc/tests/test_data.py +11 -3
  26. taxcalc/tests/test_decorators.py +57 -52
  27. taxcalc/tests/test_growdiff.py +2 -2
  28. taxcalc/tests/test_parameters.py +101 -53
  29. taxcalc/tests/test_policy.py +154 -154
  30. taxcalc/tests/test_records.py +144 -9
  31. taxcalc/tests/test_reforms.py +104 -104
  32. taxcalc/tests/test_taxcalcio.py +13 -62
  33. taxcalc/utils.py +3 -3
  34. {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/METADATA +3 -6
  35. {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/RECORD +39 -46
  36. taxcalc/puf_ratios.csv +0 -26
  37. taxcalc/puf_weights.csv.gz +0 -0
  38. taxcalc/reforms/clp.out.csv +0 -10
  39. taxcalc/tests/test_compare.py +0 -330
  40. taxcalc/tests/test_compatible_data.py +0 -334
  41. taxcalc/tests/test_puf_var_stats.py +0 -194
  42. taxcalc/tests/test_pufcsv.py +0 -328
  43. {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/WHEEL +0 -0
  44. {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/entry_points.txt +0 -0
  45. {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/licenses/LICENSE +0 -0
  46. {taxcalc-5.2.0.dist-info → taxcalc-6.0.0.dist-info}/top_level.txt +0 -0
@@ -1,334 +0,0 @@
1
- """
2
- Tests of the compatible_data fields in the policy_current_law.json file.
3
-
4
- In order to tap into the parallelization capabilities of py.test, this module
5
- leans heavily on py.tests's `parametrization` method. Once you do so, the
6
- plug-in pytest-xdist is able to run all parametrized functions in parallel
7
- """
8
- # CODING-STYLE CHECKS:
9
- # pycodestyle test_compatible_data.py
10
- # pylint --disable=locally-disabled test_compatible_data.py
11
-
12
- import copy
13
- import pytest
14
- import numpy as np
15
- from taxcalc.policy import Policy
16
- from taxcalc.records import Records
17
- from taxcalc.calculator import Calculator
18
-
19
-
20
- @pytest.fixture(scope='module', name='allparams')
21
- def fixture_allparams():
22
- """
23
- Return metadata for current-law policy parameters.
24
- """
25
- clp = Policy()
26
- return clp.metadata()
27
-
28
-
29
- def test_compatible_data_presence(allparams):
30
- """
31
- Test that every parameter in the policy_current_law.json file
32
- has a compatible_data field that is a dictionary.
33
- """
34
- compatible_data_keys_set = set(['puf', 'cps'])
35
-
36
- # Nested function used only in test_compatible_data_presence
37
- def valid_compatible_data(compatible_data):
38
- """
39
- Return True if compatible_data is a valid dictionary;
40
- otherwise return False
41
- """
42
- if not isinstance(compatible_data, dict):
43
- return False
44
- if set(compatible_data.keys()) != compatible_data_keys_set:
45
- return False
46
- for key in compatible_data:
47
- boolean = (compatible_data[key] is True or
48
- compatible_data[key] is False)
49
- if not boolean:
50
- return False
51
- return True
52
-
53
- # Main logic of test_compatible_data_presence function
54
- problem_pnames = []
55
- for pname in allparams:
56
- if 'compatible_data' in allparams[pname]:
57
- compatible_data = allparams[pname]['compatible_data']
58
- else:
59
- compatible_data = None
60
- if not valid_compatible_data(compatible_data):
61
- problem_pnames.append(pname)
62
- if problem_pnames:
63
- msg = '{} has no or invalid compatible_data field'
64
- for pname in problem_pnames:
65
- print(msg.format(pname))
66
- assert False, 'ERROR: list of problem_pnames is above'
67
-
68
-
69
- XX_YEAR = 2019
70
- TEST_YEAR = 2020
71
-
72
-
73
- @pytest.fixture(scope='module', name='reform_xx')
74
- def fixture_reform_xx():
75
- """
76
- Fixture for reform dictionary where reform starts before TEST_YEAR.
77
-
78
- The provisions in the baseline reform, designated in _reform_xx,
79
- are chosen to activate parameters that are inactive under current law.
80
- For example a phaseout rate for a new credit is inactive if the credit's
81
- amount is set to zero under current law. In order to activate the phaseout
82
- rate, the credit amount should be set above zero. The provisions interact
83
- with each other: you may acidentally deactivate one parameter
84
- by introducing a provision to activate another. If you find that a pair of
85
- parameters are impossible test jointly, add one to the local variable
86
- `exempt_from_testing` in `test_compatible_data()` as a last resort.
87
- """
88
- assert XX_YEAR < TEST_YEAR
89
-
90
- # Set baseline to activate parameters that are inactive under current law.
91
- _reform_xx = {
92
- XX_YEAR: {
93
- 'FST_AGI_trt': 0.5,
94
- 'CTC_new_rt': 0.5,
95
- 'CTC_new_c': 5000,
96
- 'CTC_new_prt': 0.1,
97
- 'CTC_new_refund_limited': True,
98
- 'CTC_new_refund_limit_payroll_rt': 1,
99
- 'ACTC_ChildNum': 1,
100
- 'UBI_u18': 1000,
101
- 'UBI_1820': 1000,
102
- 'UBI_21': 1000,
103
- 'II_credit_prt': 0.1,
104
- 'II_credit': [100, 100, 100, 100, 100],
105
- 'CG_brk3': [1000000, 1000000, 1000000, 1000000, 1000000],
106
- 'ALD_Dependents_Child_c': 1000,
107
- 'II_credit_nr': [1000, 1000, 1000, 1000, 1000],
108
- 'II_credit_nr_prt': 0.1,
109
- 'AMT_CG_brk3': [500000, 500000, 500000, 500000, 500000],
110
- 'AGI_surtax_thd': [1000000, 1000000, 1000000, 1000000, 1000000],
111
- 'AGI_surtax_trt': 0.5,
112
- 'II_brk7': [1000000, 1000000, 1000000, 1000000, 1000000],
113
- 'II_em': 1000,
114
- 'ID_Casualty_hc': 0.1,
115
- 'ID_Miscellaneous_hc': 0.1,
116
- 'ID_prt': 0.03,
117
- 'ID_crt': 0.8,
118
- 'CR_Charity_rt': 0.4,
119
- 'CR_Charity_f': [5000, 5000, 5000, 5000, 5000],
120
- 'CR_Charity_frt': 0.5,
121
- 'CR_SchR_hc': 0.5
122
- }
123
- }
124
- return _reform_xx
125
-
126
-
127
- @pytest.fixture(scope='module', name='sorted_param_names')
128
- def fixture_sorted_param_names(allparams):
129
- """
130
- Fixture for storing a sorted parameter list
131
- """
132
- return sorted(list(allparams.keys()))
133
-
134
-
135
- NPARAMS = 219 # hard-code NPARAMS to len(allparams)
136
- BATCHSIZE = 10
137
- BATCHES = int(np.floor(NPARAMS / BATCHSIZE)) + 1
138
-
139
-
140
- @pytest.fixture(scope='module', name='allparams_batch',
141
- params=list(range(0, BATCHES)))
142
- def fixture_allparams_batch(request, allparams, sorted_param_names):
143
- """
144
- Fixture for grouping Tax-Calculator parameters
145
-
146
- Experiments indicated that there is some overhead when you run
147
- `test_compatible_data` on each parameter individually. Suppose it takes X
148
- amount of time to set up the test data for `test_compatible_data` and Y
149
- amount of time to run `test_compatible_data` on each parameter wihtout
150
- parallelization. Then, if there is no overhead from parallelization, you
151
- would expect it to take Y + (X / NUMBER_WORKERS) to run these tests in
152
- parallel. Note that setup data is only created once if you set the
153
- fixture scope to 'module'. However, experiments indicated that there was
154
- so much overhead that the tests weren't that much faster in parallel than
155
- if they were run sequentially.
156
-
157
- I found that running the parameters in batches decreased the amount of
158
- overhead. Further, there was an optimal batch size that I found through
159
- trial and error. On my local machine, this was something like 10
160
- parameters. Others may find a different optimal batch size on their
161
- machines. Further, if the number of parameters changes, the optimal
162
- batch size could change, too.
163
-
164
- Math for partitioning the parameters:
165
-
166
- Suppose we have N parameters and choose batch size n. Then, we have
167
- B batches where B equals floor(N / n) + 1.
168
-
169
- Case 1: N % n = 0
170
- Then we have:
171
- idx_min = {i * b, i = 0, 1, 2, 3, ..., B - 1} and
172
- idx_max = {min((i + 1) * b, N), i = 0, 1, 2, 3, ..., B - 1}
173
-
174
- So, if i equals 0, the batch contains the first b - 1 parameters.
175
- Then, if i equals B, then idx_min is n * (B - 1) = N and idx_max is N and
176
- thus, the last batch is empty.
177
-
178
- Case 2: N % n = r > 0
179
- Then, everything is the same as case 1, except for the final batch.
180
- In the final batch, idx_min = b * (B - 1) = b * floor(N / n) < N, and
181
- idx_max is N. So, we our final batch size is
182
- idx_max - idx_min = N - b * B = r.
183
-
184
- returns: dictionary of size, BATCHSIZE, or for the final batch,
185
- either an empty dictionary or dictionary of size NPARAMS mod BATCHSIZE
186
- """
187
- idx = request.param
188
- idx_start = idx * BATCHSIZE
189
- idx_end = min((idx + 1) * BATCHSIZE, NPARAMS)
190
- pnames = sorted_param_names[idx_start: idx_end]
191
- return {pname: allparams[pname] for pname in pnames}
192
-
193
-
194
- @pytest.fixture(scope='module', name='tc_objs',
195
- params=[True, False])
196
- def fixture_tc_objs(request, reform_xx, puf_subsample, cps_subsample):
197
- """
198
- Fixture for creating Tax-Calculator objects that use the PUF and
199
- use the CPS (called only twice: once for PUF and once for CPS)
200
- """
201
- puftest = request.param
202
- p_xx = Policy()
203
- p_xx.implement_reform(reform_xx, raise_errors=False)
204
- if puftest:
205
- rec_xx = Records(data=puf_subsample)
206
- else:
207
- rec_xx = Records.cps_constructor(data=cps_subsample)
208
- c_xx = Calculator(policy=p_xx, records=rec_xx)
209
- c_xx.advance_to_year(TEST_YEAR)
210
- c_xx.calc_all()
211
- return rec_xx, c_xx, puftest
212
-
213
-
214
- @pytest.mark.skip
215
- @pytest.mark.pre_release
216
- @pytest.mark.compatible_data
217
- @pytest.mark.requires_pufcsv
218
- def test_compatible_data(cps_subsample, puf_subsample,
219
- allparams, reform_xx,
220
- tc_objs, allparams_batch):
221
- """
222
- Test that the compatible_data attribute in policy_current_law.json
223
- is accurate by implementing the min and max values of each parameter
224
- as reforms and ensuring that revenue differs from baseline when for
225
- at least one of these reforms when using datasets marked compatible
226
- and does not differ when using datasets marked as incompatible.
227
- """
228
- # pylint: disable=too-many-arguments,too-many-positional-arguments
229
- # pylint: disable=too-many-statements,too-many-branches,too-many-locals
230
-
231
- # Check NPARAMS value
232
- assert NPARAMS == len(allparams)
233
-
234
- # Get taxcalc objects from tc_objs fixture
235
- rec_xx, c_xx, puftest = tc_objs
236
-
237
- # These parameters are exempt because they are not active under
238
- # current law and activating them would deactivate other parameters,
239
- # or if it is difficult to devise a test for them.
240
- exempt_from_testing = [
241
- 'CG_ec', 'CG_reinvest_ec_rt',
242
- 'II_prt', 'ID_prt', 'ID_crt',
243
- 'CR_SchR_hc', 'ACTC_ChildNum'
244
- ]
245
-
246
- # Loop through the parameters in allparams_batch
247
- errmsg = 'ERROR: {} {}\n'
248
- errors = ''
249
- for pname in allparams_batch:
250
- param = allparams_batch[pname]
251
- max_listed = param['valid_values']['max']
252
- # handle links to other params or self
253
- if isinstance(max_listed, str):
254
- if isinstance(allparams[max_listed]['value'][0], list):
255
- max_val = allparams[max_listed]['value'][0]
256
- else:
257
- max_val = float(allparams[max_listed]['value'][0])
258
- else:
259
- if isinstance(param['value'][0], list):
260
- max_val = [max_listed] * len(param['value'][0])
261
- else:
262
- max_val = max_listed
263
- min_listed = param['valid_values']['min']
264
- if isinstance(min_listed, str):
265
- if isinstance(allparams[min_listed]['value'][0], list):
266
- min_val = allparams[min_listed]['value'][0]
267
- else:
268
- min_val = float(allparams[min_listed]['value'][0])
269
- else:
270
- if isinstance(param['value'][0], list):
271
- min_val = [min_listed] * len(param['value'][0])
272
- else:
273
- min_val = min_listed
274
- # create reform dictionaries
275
- max_reform = copy.deepcopy(reform_xx)
276
- min_reform = copy.deepcopy(reform_xx)
277
- max_reform[XX_YEAR][str(pname)] = [max_val]
278
- min_reform[XX_YEAR][str(pname)] = [min_val]
279
- # assess whether max reform changes results
280
- if puftest:
281
- rec_yy = Records(data=puf_subsample)
282
- else:
283
- rec_yy = Records.cps_constructor(data=cps_subsample)
284
- p_yy = Policy()
285
- p_yy.implement_reform(max_reform, raise_errors=False)
286
- c_yy = Calculator(policy=p_yy, records=rec_yy, verbose=False)
287
- c_yy.advance_to_year(TEST_YEAR)
288
- c_yy.calc_all()
289
- if pname.startswith('BEN') and pname.endswith('_repeal'):
290
- max_reform_change = (
291
- c_yy.weighted_total('benefit_cost_total') -
292
- c_xx.weighted_total('benefit_cost_total')
293
- )
294
- else:
295
- max_reform_change = (
296
- c_yy.weighted_total('combined') -
297
- c_xx.weighted_total('combined')
298
- )
299
- min_reform_change = 0
300
- # assess whether min reform changes results, if max reform did not
301
- if max_reform_change == 0:
302
- p_yy = Policy()
303
- p_yy.implement_reform(min_reform, raise_errors=False)
304
- c_yy = Calculator(policy=p_yy, records=rec_xx)
305
- c_yy.advance_to_year(TEST_YEAR)
306
- c_yy.calc_all()
307
- if pname.startswith('BEN') and pname.endswith('_repeal'):
308
- min_reform_change = (
309
- c_yy.weighted_total('benefit_cost_total') -
310
- c_xx.weighted_total('benefit_cost_total')
311
- )
312
- else:
313
- min_reform_change = (
314
- c_yy.weighted_total('combined') -
315
- c_xx.weighted_total('combined')
316
- )
317
- if min_reform_change == 0 and pname not in exempt_from_testing:
318
- if puftest:
319
- if param['compatible_data']['puf'] is True:
320
- errors += errmsg.format(pname, 'is not True for puf')
321
- else:
322
- if param['compatible_data']['cps'] is True:
323
- errors += errmsg.format(pname, 'is not True for cps')
324
- if max_reform_change != 0 or min_reform_change != 0:
325
- if puftest:
326
- if param['compatible_data']['puf'] is False:
327
- errors += errmsg.format(pname, 'is not False for puf')
328
- else:
329
- if param['compatible_data']['cps'] is False:
330
- errors += errmsg.format(pname, 'is not False for cps')
331
- # test failure if any errors
332
- if errors:
333
- print(errors)
334
- assert False, 'ERROR: compatible_data is invalid; see errors above'
@@ -1,194 +0,0 @@
1
- """
2
- Test generates statistics for puf.csv variables.
3
- """
4
- # CODING-STYLE CHECKS:
5
- # pycodestyle test_puf_var_stats.py
6
- # pylint --disable=locally-disabled test_puf_var_stats.py
7
-
8
- import os
9
- import json
10
- import copy
11
- import numpy as np
12
- import pandas as pd
13
- import pytest
14
- from taxcalc.policy import Policy
15
- from taxcalc.records import Records
16
- from taxcalc.calculator import Calculator
17
-
18
-
19
- def create_base_table(test_path):
20
- """
21
- Create and return base table.
22
- """
23
- # specify calculated variable names and descriptions
24
- calc_dict = {'eitc': 'Federal EITC',
25
- 'iitax': 'Federal income tax liability',
26
- 'payrolltax': 'Payroll taxes (ee+er) for OASDI+HI',
27
- 'c00100': 'Federal AGI',
28
- 'c02500': 'OASDI benefits in AGI',
29
- 'c04600': 'Post-phase-out personal exemption',
30
- 'c21040': 'Itemized deduction that is phased out',
31
- 'c04470': 'Post-phase-out itemized deduction',
32
- 'c04800': 'Federal regular taxable income',
33
- 'c05200': 'Regular tax on taxable income',
34
- 'c07220': 'Child tax credit (adjusted)',
35
- 'c11070': 'Extra child tax credit (refunded)',
36
- 'c07180': 'Child care credit',
37
- 'c09600': 'Federal AMT liability'}
38
- # specify read variable names and descriptions
39
- unused_var_set = set(['DSI', 'EIC',
40
- 'h_seq', 'a_lineno', 'ffpos', 'fips', 'agi_bin',
41
- 'FLPDYR', 'FLPDMO', 'f2441', 'f3800', 'f6251',
42
- 'f8582', 'f8606', 'f8829', 'f8910', 'f8936', 'n20',
43
- 'n24', 'n25', 'n30', 'PREP', 'SCHB', 'SCHCF', 'SCHE',
44
- 'TFORM', 'IE', 'TXST', 'XFPT', 'XFST', 'XOCAH',
45
- 'XOCAWH', 'XOODEP', 'XOPAR', 'XTOT', 'MARS', 'MIDR',
46
- 'RECID', 'gender', 'wage_head', 'wage_spouse',
47
- 'earnsplit', 'agedp1', 'agedp2', 'agedp3',
48
- 's006', 's008', 's009', 'WSAMP', 'TXRT',
49
- 'matched_weight', 'e00200p', 'e00200s',
50
- 'e00900p', 'e00900s', 'e02100p', 'e02100s',
51
- 'age_head', 'age_spouse',
52
- 'nu18', 'n1820', 'n21',
53
- 'ssi_ben', 'snap_ben', 'other_ben',
54
- 'mcare_ben', 'mcaid_ben', 'vet_ben',
55
- 'housing_ben', 'tanf_ben', 'wic_ben',
56
- 'blind_head', 'blind_spouse',
57
- 'PT_SSTB_income',
58
- 'PT_binc_w2_wages',
59
- 'PT_ubia_property'])
60
- records_varinfo = Records(data=None)
61
- read_vars = list(records_varinfo.USABLE_READ_VARS - unused_var_set)
62
- # get read variable information from JSON file
63
- rec_vars_path = os.path.join(test_path, '..', 'records_variables.json')
64
- with open(rec_vars_path, 'r', encoding='utf-8') as rvfile:
65
- read_var_dict = json.load(rvfile)
66
- # create table_dict with sorted read vars followed by sorted calc vars
67
- table_dict = {}
68
- for var in sorted(read_vars):
69
- if "taxdata_puf" in read_var_dict['read'][var]['availability']:
70
- table_dict[var] = read_var_dict['read'][var]['desc']
71
- else:
72
- pass
73
- sorted_calc_vars = sorted(calc_dict.keys())
74
- for var in sorted_calc_vars:
75
- table_dict[var] = calc_dict[var]
76
- # construct DataFrame table from table_dict
77
- table = pd.DataFrame.from_dict(table_dict, orient='index')
78
- table.columns = ['description']
79
- return table
80
-
81
-
82
- def calculate_corr_stats(calc, table):
83
- """
84
- Calculate correlation coefficient matrix.
85
- """
86
- errmsg = ''
87
- for varname1 in table.index:
88
- var1 = calc.array(varname1)
89
- var1_cc = []
90
- for varname2 in table.index:
91
- var2 = calc.array(varname2)
92
- try:
93
- cor = np.corrcoef(var1, var2)[0][1]
94
- except FloatingPointError:
95
- msg = f'corr-coef error for {varname1} and {varname2}\n'
96
- errmsg += msg
97
- cor = 9.99 # because could not compute it
98
- var1_cc.append(cor)
99
- table[varname1] = var1_cc
100
- if errmsg:
101
- raise ValueError('\n' + errmsg)
102
-
103
-
104
- def calculate_mean_stats(calc, table, year):
105
- """
106
- Calculate weighted means for year.
107
- """
108
- total_weight = calc.total_weight()
109
- means = []
110
- for varname in table.index:
111
- wmean = calc.weighted_total(varname) / total_weight
112
- means.append(wmean)
113
- table[str(year)] = means
114
-
115
-
116
- def differences(new_filename, old_filename, stat_kind):
117
- """
118
- Return message string if differences detected by np.allclose();
119
- otherwise return empty string.
120
- """
121
- new_df = pd.read_csv(new_filename)
122
- old_df = pd.read_csv(old_filename)
123
- diffs = False
124
- if list(new_df.columns.values) == list(old_df.columns.values):
125
- for col in new_df.columns[1:]:
126
- if col == 'description':
127
- continue # skip description column
128
- if not np.allclose(new_df[col], old_df[col]):
129
- diffs = True
130
- else:
131
- diffs = True
132
- if diffs:
133
- new_name = os.path.basename(new_filename)
134
- old_name = os.path.basename(old_filename)
135
- msg = f'{stat_kind} RESULTS DIFFER:\n'
136
- msg += '-------------------------------------------------'
137
- msg += '-------------\n'
138
- msg += f'--- NEW RESULTS IN {new_name} FILE ---\n'
139
- msg += f'--- if new OK, copy {new_name} to\n'
140
- msg += f'--- {old_name} \n'
141
- msg += '--- and rerun test. '
142
- msg += '-------------------------------------------------'
143
- msg += '-------------\n'
144
- else:
145
- msg = ''
146
- os.remove(new_filename)
147
- return msg
148
-
149
-
150
- MEAN_FILENAME = 'puf_var_wght_means_by_year.csv'
151
- CORR_FILENAME = 'puf_var_correl_coeffs_2016.csv'
152
-
153
-
154
- @pytest.mark.requires_pufcsv
155
- def test_puf_var_stats(tests_path, puf_fullsample):
156
- """
157
- Main logic of test.
158
- """
159
- # create a baseline Policy object containing 2017_law.json parameters
160
- pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json')
161
- pre_tcja = Policy.read_json_reform(pre_tcja_jrf)
162
- baseline_policy = Policy()
163
- baseline_policy.implement_reform(pre_tcja)
164
- # create a Calculator object using baseline_policy and full puf.csv sample
165
- rec = Records(data=puf_fullsample)
166
- calc = Calculator(policy=baseline_policy, records=rec, verbose=False)
167
- # create base tables
168
- table_mean = create_base_table(tests_path)
169
- table_corr = copy.deepcopy(table_mean)
170
- del table_corr['description']
171
- # add statistics to tables
172
- year_headers = ['description']
173
- for year in range(Policy.JSON_START_YEAR, 2024 + 1):
174
- assert year == calc.current_year
175
- year_headers.append(str(year))
176
- calc.calc_all()
177
- calculate_mean_stats(calc, table_mean, year)
178
- if year == 2016:
179
- calculate_corr_stats(calc, table_corr)
180
- if year < 2034:
181
- calc.increment_year()
182
- # write tables to new CSV files
183
- mean_path = os.path.join(tests_path, MEAN_FILENAME + '-new')
184
- table_mean.sort_index(inplace=True)
185
- table_mean.to_csv(mean_path, header=year_headers, float_format='%8.0f')
186
- corr_path = os.path.join(tests_path, CORR_FILENAME + '-new')
187
- table_corr.sort_index(inplace=True)
188
- table_corr.to_csv(corr_path, float_format='%8.2f',
189
- columns=table_corr.index)
190
- # compare new and old CSV files for differences
191
- mean_msg = differences(mean_path, mean_path[:-4], 'MEAN')
192
- corr_msg = differences(corr_path, corr_path[:-4], 'CORR')
193
- if mean_msg or corr_msg:
194
- raise ValueError(mean_msg + corr_msg)