taxcalc 4.2.1__py3-none-any.whl → 4.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- taxcalc/__init__.py +1 -1
- taxcalc/assumptions/ASSUMPTIONS.md +53 -0
- taxcalc/assumptions/README.md +17 -0
- taxcalc/assumptions/economic_assumptions_template.json +77 -0
- taxcalc/calcfunctions.py +7 -4
- taxcalc/data.py +10 -5
- taxcalc/policy_current_law.json +2033 -184
- taxcalc/reforms/2017_law.json +125 -0
- taxcalc/reforms/2017_law.out.csv +10 -0
- taxcalc/reforms/ARPA.json +78 -0
- taxcalc/reforms/ARPA.out.csv +10 -0
- taxcalc/reforms/BrownKhanna.json +23 -0
- taxcalc/reforms/BrownKhanna.out.csv +10 -0
- taxcalc/reforms/CARES.json +40 -0
- taxcalc/reforms/CARES.out.csv +10 -0
- taxcalc/reforms/ConsolidatedAppropriationsAct2021.json +15 -0
- taxcalc/reforms/ConsolidatedAppropriationsAct2021.out.csv +10 -0
- taxcalc/reforms/Larson2019.json +36 -0
- taxcalc/reforms/Larson2019.out.csv +10 -0
- taxcalc/reforms/README.md +22 -0
- taxcalc/reforms/REFORMS.md +92 -0
- taxcalc/reforms/Renacci.json +61 -0
- taxcalc/reforms/Renacci.out.csv +10 -0
- taxcalc/reforms/SandersDeFazio.json +15 -0
- taxcalc/reforms/SandersDeFazio.out.csv +10 -0
- taxcalc/reforms/TCJA.json +160 -0
- taxcalc/reforms/TCJA.md +48 -0
- taxcalc/reforms/TCJA.out.csv +10 -0
- taxcalc/reforms/Trump2016.json +71 -0
- taxcalc/reforms/Trump2016.out.csv +10 -0
- taxcalc/reforms/Trump2017.json +51 -0
- taxcalc/reforms/Trump2017.out.csv +10 -0
- taxcalc/reforms/archive/Clinton2016.json +56 -0
- taxcalc/reforms/archive/RyanBrady.json +104 -0
- taxcalc/reforms/archive/TCJA_House.json +144 -0
- taxcalc/reforms/archive/TCJA_House_Amended.json +152 -0
- taxcalc/reforms/archive/TCJA_Reconciliation.json +187 -0
- taxcalc/reforms/archive/TCJA_Senate.json +116 -0
- taxcalc/reforms/archive/TCJA_Senate_111417.json +169 -0
- taxcalc/reforms/archive/TCJA_Senate_120117.json +174 -0
- taxcalc/reforms/cases.csv +10 -0
- taxcalc/reforms/clp.out.csv +10 -0
- taxcalc/reforms/ext.json +59 -0
- taxcalc/reforms/growfactors_ext.csv +65 -0
- taxcalc/reforms/ptaxes0.json +37 -0
- taxcalc/reforms/ptaxes0.out.csv +10 -0
- taxcalc/reforms/ptaxes1.json +21 -0
- taxcalc/reforms/ptaxes1.out.csv +10 -0
- taxcalc/reforms/ptaxes2.json +18 -0
- taxcalc/reforms/ptaxes2.out.csv +10 -0
- taxcalc/reforms/ptaxes3.json +28 -0
- taxcalc/reforms/ptaxes3.out.csv +10 -0
- taxcalc/reforms/rounding2022.json +153 -0
- taxcalc/reforms/rounding2022.out.csv +10 -0
- taxcalc/tests/benefits_expect.csv +169 -0
- taxcalc/tests/cmpi_cps_expect.txt +132 -0
- taxcalc/tests/cmpi_puf_expect.txt +132 -0
- taxcalc/tests/conftest.py +143 -0
- taxcalc/tests/cpscsv_agg_expect.csv +26 -0
- taxcalc/tests/puf_var_correl_coeffs_2016.csv +80 -0
- taxcalc/tests/puf_var_wght_means_by_year.csv +80 -0
- taxcalc/tests/pufcsv_agg_expect.csv +26 -0
- taxcalc/tests/pufcsv_mtr_expect.txt +63 -0
- taxcalc/tests/reforms.json +649 -0
- taxcalc/tests/reforms_expect.csv +65 -0
- taxcalc/tests/test_4package.py +67 -0
- taxcalc/tests/test_benefits.py +86 -0
- taxcalc/tests/test_calcfunctions.py +871 -0
- taxcalc/tests/test_calculator.py +1021 -0
- taxcalc/tests/test_compare.py +336 -0
- taxcalc/tests/test_compatible_data.py +338 -0
- taxcalc/tests/test_consumption.py +144 -0
- taxcalc/tests/test_cpscsv.py +163 -0
- taxcalc/tests/test_data.py +133 -0
- taxcalc/tests/test_decorators.py +332 -0
- taxcalc/tests/test_growdiff.py +102 -0
- taxcalc/tests/test_growfactors.py +94 -0
- taxcalc/tests/test_parameters.py +617 -0
- taxcalc/tests/test_policy.py +1575 -0
- taxcalc/tests/test_puf_var_stats.py +194 -0
- taxcalc/tests/test_pufcsv.py +385 -0
- taxcalc/tests/test_records.py +234 -0
- taxcalc/tests/test_reforms.py +385 -0
- taxcalc/tests/test_responses.py +41 -0
- taxcalc/tests/test_taxcalcio.py +755 -0
- taxcalc/tests/test_tmdcsv.py +38 -0
- taxcalc/tests/test_utils.py +792 -0
- taxcalc/tmd_growfactors.csv +54 -54
- taxcalc/tmd_weights.csv.gz +0 -0
- taxcalc/validation/CSV_INPUT_VARS.md +29 -0
- taxcalc/validation/CSV_OUTPUT_VARS.md +63 -0
- taxcalc/validation/README.md +68 -0
- taxcalc/validation/taxsim35/Differences_Explained.md +54 -0
- taxcalc/validation/taxsim35/README.md +139 -0
- taxcalc/validation/taxsim35/expected_differences/a17-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/a18-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/a19-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/a20-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/a21-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/b17-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/b18-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/b19-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/b20-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/b21-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/c17-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/c18-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/expected_differences/c19-taxdiffs-expect.csv +25 -0
- taxcalc/validation/taxsim35/input_setup.py +67 -0
- taxcalc/validation/taxsim35/main_comparison.py +183 -0
- taxcalc/validation/taxsim35/prepare_taxcalc_input.py +161 -0
- taxcalc/validation/taxsim35/process_taxcalc_output.py +140 -0
- taxcalc/validation/taxsim35/taxsim_emulation.json +49 -0
- taxcalc/validation/taxsim35/taxsim_input.py +321 -0
- taxcalc/validation/taxsim35/tc_sims.py +98 -0
- taxcalc/validation/taxsim35/tests_35.py +80 -0
- taxcalc/validation/tests_35.sh +13 -0
- {taxcalc-4.2.1.dist-info → taxcalc-4.2.2.dist-info}/METADATA +3 -4
- taxcalc-4.2.2.dist-info/RECORD +144 -0
- {taxcalc-4.2.1.dist-info → taxcalc-4.2.2.dist-info}/WHEEL +1 -1
- taxcalc-4.2.1.dist-info/RECORD +0 -34
- {taxcalc-4.2.1.dist-info → taxcalc-4.2.2.dist-info}/LICENSE +0 -0
- {taxcalc-4.2.1.dist-info → taxcalc-4.2.2.dist-info}/entry_points.txt +0 -0
- {taxcalc-4.2.1.dist-info → taxcalc-4.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
"""
|
2
|
+
Translates tc --dump output file into file formatted like TAXSIM-35 output.
|
3
|
+
"""
|
4
|
+
|
5
|
+
# CODING-STYLE CHECKS:
|
6
|
+
# pycodestyle process_tc_output.py
|
7
|
+
# pylint --disable=locally-disabled process_tc_output.py
|
8
|
+
|
9
|
+
import argparse
|
10
|
+
import os
|
11
|
+
import sys
|
12
|
+
import pandas as pd
|
13
|
+
|
14
|
+
|
15
|
+
def main(input_file_name, output_file_name):
|
16
|
+
"""
|
17
|
+
Translates tc --dump output file into an output file that is
|
18
|
+
formatted like the first 28 variables in TAXSIM-35 output. The INPUT
|
19
|
+
file contains the output generated by running tc with the --dump
|
20
|
+
option. Any pre-existing OUTPUT file contents will be overwritten.
|
21
|
+
For details on Internet TAXSIM version 35 OUTPUT format, go to
|
22
|
+
https://users.nber.org/~taxsim/taxsim35/
|
23
|
+
|
24
|
+
Args:
|
25
|
+
input_file_name (string): name of file with taxcalc formatted output
|
26
|
+
output_file_name (string): name of file with taxsim formatted output
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
None
|
30
|
+
"""
|
31
|
+
# check INPUT filename
|
32
|
+
if input_file_name == "":
|
33
|
+
sys.stderr.write("ERROR: must specify INPUT file name\n")
|
34
|
+
sys.stderr.write("USAGE: {}\n".format(usage_str))
|
35
|
+
assert False
|
36
|
+
if not os.path.isfile(input_file_name):
|
37
|
+
emsg = "INPUT file named {} does not exist".format(args.INPUT)
|
38
|
+
sys.stderr.write("ERROR: {}\n".format(emsg))
|
39
|
+
assert False
|
40
|
+
# check OUTPUT filename
|
41
|
+
if output_file_name == "":
|
42
|
+
sys.stderr.write("ERROR: must specify OUTPUT file name\n")
|
43
|
+
sys.stderr.write("USAGE: {}\n".format(usage_str))
|
44
|
+
assert False
|
45
|
+
if os.path.isfile(output_file_name):
|
46
|
+
os.remove(output_file_name)
|
47
|
+
# read INPUT file into a pandas DataFrame
|
48
|
+
tcvar = pd.read_csv(input_file_name)
|
49
|
+
# write OUTPUT file using the pandas DataFrame
|
50
|
+
write_taxsim_formatted_output(output_file_name, tcvar)
|
51
|
+
# return no-error exit code
|
52
|
+
return 0
|
53
|
+
|
54
|
+
|
55
|
+
# end of main function code
|
56
|
+
|
57
|
+
|
58
|
+
def write_taxsim_formatted_output(filename, tcvar):
|
59
|
+
"""
|
60
|
+
Write contents of tcvar pandas DataFrame to filename using
|
61
|
+
Internet-TAXSIM 9.3 output format containing 28 variables.
|
62
|
+
"""
|
63
|
+
assert isinstance(tcvar, pd.DataFrame)
|
64
|
+
# with open(filename, 'w') as output_file:
|
65
|
+
# for idx in range(0, len(tcvar.index)):
|
66
|
+
# odict4idx = extract_output(tcvar.xs(idx))
|
67
|
+
# outline = construct_output_line(odict4idx)
|
68
|
+
# output_file.write(outline)
|
69
|
+
tcvar["state"] = 0 # state code is always zero
|
70
|
+
tcvar["statetax"] = 0.0 # no state income tax calculation
|
71
|
+
tcvar["mtr_state"] = 0.0 # no state income tax calculation
|
72
|
+
tcvar["zero_bracket_amount"] = (
|
73
|
+
0.0 # always set zero-bracket amount to zero
|
74
|
+
)
|
75
|
+
pre_phase_out_pe = tcvar["pre_c04600"].values
|
76
|
+
post_phase_out_pe = tcvar["c04600"].values
|
77
|
+
phased_out_pe = pre_phase_out_pe - post_phase_out_pe
|
78
|
+
tcvar["post_phase_out_pe"] = (
|
79
|
+
post_phase_out_pe # post-phase-out personal exemption
|
80
|
+
)
|
81
|
+
tcvar["phased_out_pe"] = (
|
82
|
+
phased_out_pe # personal exemption that is phased out
|
83
|
+
)
|
84
|
+
tcvar["exemption_surtax"] = 0.0 # always set exemption surtax to zero
|
85
|
+
tcvar["gen_tax_credit"] = 0.0 # always set general tax credit to zero
|
86
|
+
tcvar["non_refundable_child_odep_credit"] = (
|
87
|
+
tcvar["c07220"] + tcvar["odc"] + tcvar["ctc_new"]
|
88
|
+
) # non-refundable child+odep credit
|
89
|
+
tcvar["refundable_CDCC"] = tcvar["CDCC_refund"] # refundable CDCC
|
90
|
+
tcvar["amt_liability"] = tcvar["c09600"] # federal AMT liability
|
91
|
+
# var28 from TAXSIM-35 is federal income tax before credits; the Tax-Calculator
|
92
|
+
# tcvar['c05800'] is this concept but includes AMT liability
|
93
|
+
# while Internet-TAXSIM tcvar[28] explicitly excludes AMT liability, so
|
94
|
+
# we have the following:
|
95
|
+
tcvar["iitax_before_credits_ex_AMT"] = (
|
96
|
+
tcvar["c05800"] - tcvar["amt_liability"]
|
97
|
+
)
|
98
|
+
tcvar = tcvar[
|
99
|
+
[
|
100
|
+
"RECID",
|
101
|
+
"FLPDYR",
|
102
|
+
"state",
|
103
|
+
"iitax",
|
104
|
+
"statetax",
|
105
|
+
"payrolltax",
|
106
|
+
"mtr_inctax",
|
107
|
+
"mtr_state",
|
108
|
+
# 'mtr_paytax',
|
109
|
+
"c00100",
|
110
|
+
"e02300",
|
111
|
+
"c02500",
|
112
|
+
# 'zero_bracket_amount',
|
113
|
+
"post_phase_out_pe",
|
114
|
+
"phased_out_pe",
|
115
|
+
"c21040",
|
116
|
+
"c04470",
|
117
|
+
"c04800",
|
118
|
+
"taxbc",
|
119
|
+
"exemption_surtax",
|
120
|
+
"gen_tax_credit",
|
121
|
+
"non_refundable_child_odep_credit",
|
122
|
+
"c11070",
|
123
|
+
"c07180",
|
124
|
+
"refundable_CDCC",
|
125
|
+
"eitc",
|
126
|
+
"c62100",
|
127
|
+
"amt_liability",
|
128
|
+
"iitax_before_credits_ex_AMT",
|
129
|
+
"recovery_rebate_credit",
|
130
|
+
]
|
131
|
+
]
|
132
|
+
# better mapping of to how TAXSIM-35 handles refundable credits in 2021
|
133
|
+
tcvar.loc[tcvar["FLPDYR"] == 2021, "c11070"] = tcvar.loc[
|
134
|
+
tcvar["FLPDYR"] == 2021, "non_refundable_child_odep_credit"
|
135
|
+
]
|
136
|
+
tcvar.loc[tcvar["FLPDYR"] == 2021, "c07180"] = tcvar.loc[
|
137
|
+
tcvar["FLPDYR"] == 2021, "refundable_CDCC"
|
138
|
+
]
|
139
|
+
tcvar.round(decimals=2)
|
140
|
+
tcvar.to_csv(filename)
|
@@ -0,0 +1,49 @@
|
|
1
|
+
// JSON "reform" file that specifies changes in current-law policy that
|
2
|
+
// are required to make Tax-Calculator work like TAXSIM-35.
|
3
|
+
//
|
4
|
+
// (1) AMT_child_em_c_age = 24 (rather than 18)
|
5
|
+
// Whether to set this parameter to 18 or 24 is arbitary because
|
6
|
+
// neither model has enough information to apply correctly the child
|
7
|
+
// AMT exemption rules. Information on full-time student status and
|
8
|
+
// whether taxpayers provide more than half of their support are required
|
9
|
+
// to apply the rules correctly. Tax-Calculator makes the arbitrary
|
10
|
+
// assumption that only those under 18 are required to use the child
|
11
|
+
// AMT exemption rules, while TAXSIM-27 makes the arbitrary assumption
|
12
|
+
// that all those under 24 are required to use the child AMT exemption.
|
13
|
+
// (This change was introduced for assumption set b and higher.)
|
14
|
+
//
|
15
|
+
// (2) EITC_excess_InvestIncome_rt = 1.0 (rather than 9e99)
|
16
|
+
// The rate at which the EITC amount is reduced per dollar of investment
|
17
|
+
// income in excess of the EITC investment income ceiling is infinity under
|
18
|
+
// current law (that is, any investment income in excess of the ceiling
|
19
|
+
// causes EITC ineligibility). However, TAXSIM-27 assumes it is one, so
|
20
|
+
// that the EITC amount is reduced a dollar for each dollar of excess
|
21
|
+
// investment income. This difference in the parameter value leads to
|
22
|
+
// many EITC differences in the randomly-generated validation samples,
|
23
|
+
// with some of the differences being in the thousands of dollars. This
|
24
|
+
// non-current-law assumption in TAXSIM-27 is presumably made to reduce
|
25
|
+
// the magnitude of model-calculated marginal tax rates with respect to
|
26
|
+
// investment income in cases where a marginal increase in investment
|
27
|
+
// income takes a filing unit above the ceiling.
|
28
|
+
|
29
|
+
// (3) ALD_AlimonyReceived_hc = 1.0 (rather than 0)
|
30
|
+
// TAXSIM35 nonproperty income is mapped into AlimonyRecieved
|
31
|
+
// which had its haircut change from 1.0 to 0.0, a change that
|
32
|
+
// TAXSIM35 has not implemented for good reason given this:
|
33
|
+
// IRS: 'Beginning Jan. 1, 2019, alimony or separate
|
34
|
+
// maintenance payments are not deductible from the
|
35
|
+
// income of the payer spouse, or includable in the
|
36
|
+
// income of the receiving spouse, if made under a
|
37
|
+
// divorce or separation agreement executed after
|
38
|
+
// Dec. 31, 2018.
|
39
|
+
|
40
|
+
// (4) PT_qbid_limit_switch = false implies TAXSIM35-like QBI deduction logic.
|
41
|
+
{
|
42
|
+
"AMT_child_em_c_age": {"2013": 24},
|
43
|
+
|
44
|
+
"EITC_excess_InvestIncome_rt": {"2013": 1.0},
|
45
|
+
|
46
|
+
"ALD_AlimonyReceived_hc": {"2019": 1.0},
|
47
|
+
|
48
|
+
"PT_qbid_limit_switch": {"2018": false}
|
49
|
+
}
|
@@ -0,0 +1,321 @@
|
|
1
|
+
"""
|
2
|
+
Generates random sample of tax filing units with attributes such that
|
3
|
+
generated file can be directly uploaded to Internet TAXSIM version 35.
|
4
|
+
"""
|
5
|
+
|
6
|
+
# CODING-STYLE CHECKS:
|
7
|
+
# pycodestyle taxsim_input.py
|
8
|
+
# pylint --disable=locally-disabled taxsim_input.py
|
9
|
+
|
10
|
+
import argparse
|
11
|
+
import sys
|
12
|
+
import numpy as np
|
13
|
+
import pandas as pd
|
14
|
+
|
15
|
+
|
16
|
+
VALID_LETTERS = ["a", "b", "c"]
|
17
|
+
|
18
|
+
|
19
|
+
def generate_datasets(letter, year, offset=0):
|
20
|
+
"""
|
21
|
+
Generates random sample of tax filing units with attributes and
|
22
|
+
format such that the file can be directly uploaded to Internet
|
23
|
+
TAXSIM version 35. For details on Internet TAXSIM version 35 INPUT
|
24
|
+
format, go to https://users.nber.org/~taxsim/taxsim35/
|
25
|
+
|
26
|
+
Args:
|
27
|
+
letter (character): letter denoting assumption set to generate data
|
28
|
+
year (int): year data will represent
|
29
|
+
offset (int): offset to alter the random number seed
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
None
|
33
|
+
"""
|
34
|
+
# check year value
|
35
|
+
year += 2000
|
36
|
+
if year < 2013 or year > 2023:
|
37
|
+
sys.stderr.write("ERROR: YEAR not in [2013,2023] range\n")
|
38
|
+
assert False
|
39
|
+
# check LETTER value
|
40
|
+
if letter == "":
|
41
|
+
sys.stderr.write("ERROR: must specify LETTER\n")
|
42
|
+
assert False
|
43
|
+
if letter not in VALID_LETTERS:
|
44
|
+
sys.stderr.write("ERROR: LETTER not in VALID_LETTERS, where\n")
|
45
|
+
sys.stderr.write(" VALID_LETTERS={}\n".format(VALID_LETTERS))
|
46
|
+
assert False
|
47
|
+
# check OFFSET value
|
48
|
+
if offset < 0 or offset > 999:
|
49
|
+
sys.stderr.write("ERROR: OFFSET not in [0,999] range\n")
|
50
|
+
assert False
|
51
|
+
# get dictionary containing assumption set
|
52
|
+
assump = assumption_set(letter, year)
|
53
|
+
# generate sample as pandas DataFrame
|
54
|
+
sample = sample_dataframe(assump, year, offset)
|
55
|
+
# write sample to input file
|
56
|
+
header_col = [
|
57
|
+
"taxsimid",
|
58
|
+
"year",
|
59
|
+
"state",
|
60
|
+
"mstat",
|
61
|
+
"page",
|
62
|
+
"sage",
|
63
|
+
"depx",
|
64
|
+
"dep13",
|
65
|
+
"dep17",
|
66
|
+
"dep18",
|
67
|
+
"pwages",
|
68
|
+
"swages",
|
69
|
+
"psemp",
|
70
|
+
"ssemp",
|
71
|
+
"dividends",
|
72
|
+
"intrec",
|
73
|
+
"stcg",
|
74
|
+
"ltcg",
|
75
|
+
"otherprop",
|
76
|
+
"nonprop",
|
77
|
+
"pensions",
|
78
|
+
"gssi",
|
79
|
+
"pui",
|
80
|
+
"sui",
|
81
|
+
"transfers",
|
82
|
+
"rentpaid",
|
83
|
+
"proptax",
|
84
|
+
"otheritem",
|
85
|
+
"childcare",
|
86
|
+
"mortgage",
|
87
|
+
"scorp",
|
88
|
+
"pbusinc",
|
89
|
+
"pprofinc",
|
90
|
+
"sbusinc",
|
91
|
+
"sprofinc",
|
92
|
+
"idtl",
|
93
|
+
]
|
94
|
+
filename = "{}{}.in".format(letter, year % 100)
|
95
|
+
sample.to_csv(filename, sep=",", header=header_col, index=False)
|
96
|
+
|
97
|
+
|
98
|
+
def assumption_set(letter, year):
|
99
|
+
"""
|
100
|
+
Return dictionary containing assumption parameters.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
letter (character): letter denoting assumption set to generate data
|
104
|
+
year (int): year data will represent
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
adict (dict): assumption set dictionary (defines sampling for
|
108
|
+
variables)
|
109
|
+
"""
|
110
|
+
adict = dict()
|
111
|
+
if letter in VALID_LETTERS: # <===========================================
|
112
|
+
# basic assumption parameters for all ?YY.in samples:
|
113
|
+
adict["sample_size"] = 1000 # 100000
|
114
|
+
adict["year"] = year # TAXSIM ivar 2
|
115
|
+
# demographic attributes:
|
116
|
+
adict["joint_frac"] = 0.60 # fraction of sample with joint MARS
|
117
|
+
adict["min_age"] = 17 # TAXSIM ivar 5 (primary taxpayer age)
|
118
|
+
adict["max_age"] = 77 # TAXSIM ivar 5 (primary taxpayer age)
|
119
|
+
adict["min_age_diff"] = -10 # min spouse age difference
|
120
|
+
adict["max_age_diff"] = 10 # max spouse age difference
|
121
|
+
adict["max_depx"] = 5 # TAXSIM ivar 7 (total number of dependents)
|
122
|
+
adict["max_dep13"] = 4 # TAXSIM ivar 8
|
123
|
+
adict["max_dep17"] = 4 # TAXSIM ivar 9
|
124
|
+
adict["max_dep18"] = 4 # TAXSIM ivar 10
|
125
|
+
# labor income:
|
126
|
+
adict["max_pwages_yng"] = 500 # TAXSIM ivar 11
|
127
|
+
adict["max_pwages_old"] = 30 # TAXSIM ivar 11 (65+ ==> old)
|
128
|
+
adict["max_swages_yng"] = 500 # TAXSIM ivar 12
|
129
|
+
adict["max_swages_old"] = 30 # TAXSIM ivar 12 (65+ ==> old)
|
130
|
+
# non-labor income (all zeros):
|
131
|
+
adict["max_psemp"] = 0 # TAXSIM ivar 13
|
132
|
+
adict["max_ssemp"] = 0 # TAXSIM ivar 14
|
133
|
+
adict["max_divinc"] = 0 # TAXSIM ivar 15
|
134
|
+
adict["max_intinc"] = 0 # TAXSIM ivar 16
|
135
|
+
adict["min_stcg"] = 0 # TAXSIM ivar 17
|
136
|
+
adict["max_stcg"] = 0 # TAXSIM ivar 17
|
137
|
+
adict["min_ltcg"] = 0 # TAXSIM ivar 18
|
138
|
+
adict["max_ltcg"] = 0 # TAXSIM ivar 18
|
139
|
+
adict["max_other_prop_inc"] = 0 # TAXSIM ivar 19
|
140
|
+
adict["max_other_nonprop_inc"] = 0 # TAXSIM ivar 20
|
141
|
+
adict["max_pnben"] = 0 # TAXSIM ivar 21
|
142
|
+
adict["max_ssben"] = 0 # TAXSIM ivar 22
|
143
|
+
adict["max_puiben"] = 0 # TAXSIM ivar 23
|
144
|
+
adict["max_suiben"] = 0 # TAXSIM ivar 24
|
145
|
+
# childcare expense amount (all zero):
|
146
|
+
adict["max_ccexp"] = 0 # TAXSIM ivar 29
|
147
|
+
# itemized expense amounts (all zero):
|
148
|
+
adict["max_ided_proptax"] = 0 # TAXSIM ivar 27
|
149
|
+
adict["max_ided_nopref"] = 0 # TAXSIM ivar 28
|
150
|
+
adict["max_ided_mortgage"] = 0 # TAXSIM ivar 30
|
151
|
+
adict["max_scorp_inc"] = 0 # TAXSIM ivar 31
|
152
|
+
adict["max_pbus_inc"] = 0 # TAXSIM ivar 32
|
153
|
+
adict["max_pprof_inc"] = 0 # TAXSIM ivar 33
|
154
|
+
adict["max_sbus_inc"] = 0 # TAXSIM ivar 34
|
155
|
+
adict["max_sprof_inc"] = 0 # TAXSIM ivar 35
|
156
|
+
# end if letter in VALID_LETTERS
|
157
|
+
if letter in ["b", "c"]: # <==============================================
|
158
|
+
# non-labor income:
|
159
|
+
adict["max_psemp"] = 350 # TAXSIM ivar 13
|
160
|
+
adict["max_ssemp"] = 350 # TAXSIM ivar 14
|
161
|
+
adict["max_divinc"] = 20 # TAXSIM ivar 15
|
162
|
+
adict["max_intinc"] = 20 # TAXSIM ivar 16
|
163
|
+
adict["min_stcg"] = -10 # TAXSIM ivar 17
|
164
|
+
adict["max_stcg"] = 10 # TAXSIM ivar 17
|
165
|
+
adict["min_ltcg"] = -10 # TAXSIM ivar 18
|
166
|
+
adict["max_ltcg"] = 10 # TAXSIM ivar 18
|
167
|
+
adict["max_other_prop_inc"] = 30 # TAXSIM ivar 19
|
168
|
+
adict["max_other_nonprop_inc"] = 30 # TAXSIM ivar 20
|
169
|
+
adict["max_pnben"] = 60 # TAXSIM ivar 21
|
170
|
+
adict["max_ssben"] = 60 # TAXSIM ivar 22
|
171
|
+
adict["max_puiben"] = 10 # TAXSIM ivar 23
|
172
|
+
adict["max_suiben"] = 10 # TAXSIM ivar 24
|
173
|
+
adict["max_scorp_inc"] = 350 # TAXSIM ivar 31
|
174
|
+
adict["max_pbus_inc"] = 350 # TAXSIM ivar 32
|
175
|
+
adict["max_pprof_inc"] = 0 # 1 # TAXSIM ivar 33
|
176
|
+
adict["max_sbus_inc"] = 350 # TAXSIM ivar 34
|
177
|
+
adict["max_sprof_inc"] = 0 # 1 # TAXSIM ivar 35
|
178
|
+
if letter == "c": # <=====================================================
|
179
|
+
# childcare expense amount:
|
180
|
+
adict["max_ccexp"] = 10 # TAXSIM ivar 29
|
181
|
+
# itemized expense amounts:
|
182
|
+
adict["max_ided_proptax"] = 30 # TAXSIM ivar 27
|
183
|
+
adict["max_ided_nopref"] = 10 # TAXSIM ivar 28
|
184
|
+
adict["max_ided_mortgage"] = 40 # TAXSIM ivar 30
|
185
|
+
return adict
|
186
|
+
|
187
|
+
|
188
|
+
def sample_dataframe(assump, year, offset):
|
189
|
+
"""
|
190
|
+
Construct DataFrame containing sample specified by assump and year+offset.
|
191
|
+
|
192
|
+
Args:
|
193
|
+
assump (dict): assumption set dictionary (defined sampling for variables)
|
194
|
+
year (int): year data will represent
|
195
|
+
offset (int): offset to alter the random number seed
|
196
|
+
|
197
|
+
Returns:
|
198
|
+
smpl (Pandas DataFrame): Random data in TAXSIM format
|
199
|
+
"""
|
200
|
+
# pylint: disable=too-many-locals
|
201
|
+
np.random.seed(123456789 + year + offset)
|
202
|
+
size = assump["sample_size"]
|
203
|
+
zero = np.zeros(size, dtype=np.int64)
|
204
|
+
sdict = dict()
|
205
|
+
# (01) RECID
|
206
|
+
sdict[1] = range(1, size + 1)
|
207
|
+
# (02) YEAR
|
208
|
+
sdict[2] = np.full_like(zero, assump["year"], dtype=np.int64)
|
209
|
+
# (03) STATE
|
210
|
+
sdict[3] = zero
|
211
|
+
# (04) MSTAT
|
212
|
+
urn = np.random.random(size)
|
213
|
+
mstat = np.where(urn < assump["joint_frac"], 2, 1)
|
214
|
+
sdict[4] = mstat
|
215
|
+
# (05) PAGE
|
216
|
+
sdict[5] = np.random.randint(
|
217
|
+
assump["min_age"], assump["max_age"] + 1, size
|
218
|
+
)
|
219
|
+
# (06) SAGE
|
220
|
+
age_diff = np.random.randint(
|
221
|
+
assump["min_age_diff"], assump["max_age_diff"] + 1, size
|
222
|
+
)
|
223
|
+
sage = sdict[5] + age_diff
|
224
|
+
sdict[6] = np.where(mstat == 2, np.maximum(sage, assump["min_age"]), zero)
|
225
|
+
# (07-10) DEPX, DEP13, DEP17, DEP18
|
226
|
+
depx = np.random.randint(0, assump["max_depx"] + 1, size)
|
227
|
+
d18 = np.random.randint(0, assump["max_dep18"] + 1, size)
|
228
|
+
dep18 = np.where(d18 <= depx, d18, depx)
|
229
|
+
d17 = np.random.randint(0, assump["max_dep17"] + 1, size)
|
230
|
+
dep17 = np.where(d17 <= dep18, d17, dep18)
|
231
|
+
d13 = np.random.randint(0, assump["max_dep13"] + 1, size)
|
232
|
+
dep13 = np.where(d13 <= dep17, d13, dep17)
|
233
|
+
sdict[7] = depx
|
234
|
+
# (8)-(10) are ages of 3 youngest dependents
|
235
|
+
# If these are zero, then use depx for number of EIC children
|
236
|
+
# but TAXIM-35 also accepts dep13-dep18 here to be backward compatible
|
237
|
+
# we use that since closer to what's in tax-calculator
|
238
|
+
sdict[8] = dep13
|
239
|
+
sdict[9] = dep17
|
240
|
+
sdict[10] = dep18
|
241
|
+
# (11) PWAGES
|
242
|
+
pwages_yng = np.random.randint(0, assump["max_pwages_yng"] + 1, size)
|
243
|
+
pwages_old = np.random.randint(0, assump["max_pwages_old"] + 1, size)
|
244
|
+
sdict[11] = np.where(sdict[5] >= 65, pwages_old, pwages_yng) * 1000
|
245
|
+
# (12) SWAGES
|
246
|
+
swages_yng = np.random.randint(0, assump["max_swages_yng"] + 1, size)
|
247
|
+
swages_old = np.random.randint(0, assump["max_swages_old"] + 1, size)
|
248
|
+
swages = np.where(sdict[6] >= 65, swages_old, swages_yng) * 1000
|
249
|
+
sdict[12] = np.where(mstat == 2, swages, zero)
|
250
|
+
# (13) Primary Filer Self-Employment Income
|
251
|
+
sdict[13] = np.random.randint(0, assump["max_psemp"] + 1, size)
|
252
|
+
# (14) Secondary Filer Self-Employment Income
|
253
|
+
ssemp = np.random.randint(0, assump["max_psemp"] + 1, size)
|
254
|
+
sdict[14] = np.where(mstat == 2, ssemp, zero)
|
255
|
+
# (15) DIVIDENDS
|
256
|
+
sdict[15] = np.random.randint(0, assump["max_divinc"] + 1, size) * 1000
|
257
|
+
# (16) INTREC
|
258
|
+
sdict[16] = np.random.randint(0, assump["max_intinc"] + 1, size) * 1000
|
259
|
+
# (17) STCG
|
260
|
+
sdict[17] = (
|
261
|
+
np.random.randint(assump["min_stcg"], assump["max_stcg"] + 1, size)
|
262
|
+
* 1000
|
263
|
+
)
|
264
|
+
# (18) LTCG
|
265
|
+
sdict[18] = (
|
266
|
+
np.random.randint(assump["min_ltcg"], assump["max_ltcg"] + 1, size)
|
267
|
+
* 1000
|
268
|
+
)
|
269
|
+
# (19) OTHERPROP
|
270
|
+
sdict[19] = (
|
271
|
+
np.random.randint(0, assump["max_other_prop_inc"] + 1, size) * 1000
|
272
|
+
)
|
273
|
+
# (20) NONPROP
|
274
|
+
sdict[20] = (
|
275
|
+
np.random.randint(0, assump["max_other_nonprop_inc"] + 1, size) * 1000
|
276
|
+
)
|
277
|
+
# (21) PENSIONS
|
278
|
+
sdict[21] = np.random.randint(0, assump["max_pnben"] + 1, size) * 1000
|
279
|
+
# (22) GSSI
|
280
|
+
sdict[22] = np.random.randint(0, assump["max_ssben"] + 1, size) * 1000
|
281
|
+
# (23) Primary Filer UI (note splitting UI between primary and
|
282
|
+
# secondary only matters for 2020 and 2021)
|
283
|
+
sdict[23] = np.random.randint(0, assump["max_puiben"] + 1, size) * 1000
|
284
|
+
# (24) Secondary Filer UI
|
285
|
+
sdict[24] = np.random.randint(0, assump["max_suiben"] + 1, size) * 1000
|
286
|
+
# (25) TRANSFERS (non-taxable in federal income tax)
|
287
|
+
sdict[25] = zero
|
288
|
+
# (26) RENTPAID (used only in some state income tax laws)
|
289
|
+
sdict[26] = zero
|
290
|
+
# (27) PROPTAX
|
291
|
+
sdict[27] = (
|
292
|
+
np.random.randint(0, assump["max_ided_proptax"] + 1, size) * 1000
|
293
|
+
)
|
294
|
+
# (28) OTHERITEM
|
295
|
+
sdict[28] = (
|
296
|
+
np.random.randint(0, assump["max_ided_nopref"] + 1, size) * 1000
|
297
|
+
)
|
298
|
+
# (29) CHILDCARE (TAXSIM-35 EXPECTS ZERO IF NO QUALIFYING CHILDRED)
|
299
|
+
ccexp = np.random.randint(0, assump["max_ccexp"] + 1, size) * 1000
|
300
|
+
sdict[29] = np.where(dep13 > 0, ccexp, zero)
|
301
|
+
# (30) MORTGAGE
|
302
|
+
sdict[30] = (
|
303
|
+
np.random.randint(0, assump["max_ided_mortgage"] + 1, size) * 1000
|
304
|
+
)
|
305
|
+
# (31) S-Corp income, QBI
|
306
|
+
sdict[31] = np.random.randint(0, assump["max_scorp_inc"] + 1, size) * 1000
|
307
|
+
# (32) Primary Taxpayer's QBI
|
308
|
+
sdict[32] = np.random.randint(0, assump["max_pbus_inc"] + 1, size) * 1000
|
309
|
+
# (33) Primary Taxpayer's SSTB
|
310
|
+
sdict[33] = np.random.randint(0, assump["max_pprof_inc"] + 1, size)
|
311
|
+
# (34) Spouse's QBI
|
312
|
+
sqbi = np.random.randint(0, assump["max_sbus_inc"] + 1, size) * 1000
|
313
|
+
sdict[34] = np.where(mstat == 2, sqbi, zero)
|
314
|
+
# (35) Spouse's SSTB
|
315
|
+
spouse_sstb = np.random.randint(0, assump["max_sprof_inc"] + 1, size)
|
316
|
+
sdict[35] = np.where(mstat == 2, spouse_sstb, zero)
|
317
|
+
# (36) IDTL: variable to request intermediate calculations
|
318
|
+
sdict[36] = 2
|
319
|
+
|
320
|
+
smpl = pd.DataFrame(sdict)
|
321
|
+
return smpl
|
@@ -0,0 +1,98 @@
|
|
1
|
+
import os
|
2
|
+
import sys
|
3
|
+
import shutil
|
4
|
+
import prepare_taxcalc_input as ptci
|
5
|
+
import process_taxcalc_output as ptco
|
6
|
+
|
7
|
+
CURR_PATH = os.path.abspath(os.path.dirname(__file__))
|
8
|
+
|
9
|
+
|
10
|
+
# prepare Tax-Calculator input file
|
11
|
+
def prep_tc_input(letter, year):
|
12
|
+
"""
|
13
|
+
Args:
|
14
|
+
letter (character): letter denoting assumption set to generate data
|
15
|
+
year (int): year data will represent
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
None
|
19
|
+
"""
|
20
|
+
taxsim_in = str(letter + str(year) + ".in")
|
21
|
+
taxsim_in_csv = taxsim_in + ".csv"
|
22
|
+
ptci.main(taxsim_in, taxsim_in_csv)
|
23
|
+
|
24
|
+
|
25
|
+
# calculate Tax-Calculator output
|
26
|
+
def calc_tc_output(letter, year):
|
27
|
+
"""
|
28
|
+
Args:
|
29
|
+
letter (character): letter denoting assumption set to generate data
|
30
|
+
year (int): year data will represent
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
None
|
34
|
+
"""
|
35
|
+
YY = str(year)
|
36
|
+
taxsim_in = str(letter + str(year) + ".in")
|
37
|
+
taxsim_in_csv = taxsim_in + ".csv"
|
38
|
+
taxsim_out_csv = taxsim_in + ".out.csv"
|
39
|
+
if os.path.exists(os.path.join(CURR_PATH, taxsim_in_csv)) is False:
|
40
|
+
sys.exit("ERROR: LYY_FILENAME is not a valid path")
|
41
|
+
year = "20" + YY
|
42
|
+
command = (
|
43
|
+
"tc "
|
44
|
+
+ taxsim_in_csv
|
45
|
+
+ " "
|
46
|
+
+ year
|
47
|
+
+ " --reform taxsim_emulation.json --dump"
|
48
|
+
)
|
49
|
+
os.system(command)
|
50
|
+
|
51
|
+
file_temp = taxsim_in + "-" + YY + "-#-taxsim_emulation-#.csv"
|
52
|
+
file_temp_path = os.path.join(CURR_PATH, file_temp)
|
53
|
+
file_out_path = os.path.join(CURR_PATH, taxsim_out_csv)
|
54
|
+
shutil.move(file_temp_path, file_out_path)
|
55
|
+
|
56
|
+
file_temp2 = taxsim_in + "-" + YY + "-#-taxsim_emulation-#-doc.text"
|
57
|
+
os.remove(file_temp2)
|
58
|
+
|
59
|
+
|
60
|
+
def convert_to_taxsim(letter, year, save=False):
|
61
|
+
"""
|
62
|
+
Convert Tax-Calculator output to TAXSIM-35 format
|
63
|
+
|
64
|
+
Args:
|
65
|
+
letter (character): letter denoting assumption set to generate data
|
66
|
+
year (int): year data will represent
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
None
|
70
|
+
"""
|
71
|
+
taxsim_in = str(letter + str(year) + ".in")
|
72
|
+
taxsim_in_csv = taxsim_in + ".csv"
|
73
|
+
taxsim_out_csv = taxsim_in + ".out.csv"
|
74
|
+
file_out = taxsim_in + ".out-taxcalc"
|
75
|
+
ptco.main(taxsim_out_csv, file_out)
|
76
|
+
if not save: # Delete intermediate input and output files if not saving
|
77
|
+
os.remove(taxsim_in_csv)
|
78
|
+
os.remove(taxsim_out_csv)
|
79
|
+
|
80
|
+
|
81
|
+
def io(letter, year):
|
82
|
+
"""
|
83
|
+
Call Tax-Calculator tc CLI reading input data from specified
|
84
|
+
TAXSIM-35 input file and writing output in TAXSIM-35 output format
|
85
|
+
to a file with the specified input file name plus the .out-taxcalc
|
86
|
+
extension.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
letter (character): letter denoting assumption set to generate data
|
90
|
+
year (int): year data will represent
|
91
|
+
offset (int): offset to alter the random number seed
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
None
|
95
|
+
"""
|
96
|
+
prep_tc_input(letter, year)
|
97
|
+
calc_tc_output(letter, year)
|
98
|
+
convert_to_taxsim(letter, year)
|
@@ -0,0 +1,80 @@
|
|
1
|
+
import os
|
2
|
+
import shutil
|
3
|
+
import glob
|
4
|
+
import input_setup
|
5
|
+
import main_comparison
|
6
|
+
|
7
|
+
CUR_PATH = os.path.abspath(os.path.dirname(__file__))
|
8
|
+
|
9
|
+
# define the scope of the tests
|
10
|
+
assumption_set = ["a", "b", "c"] # datafiles to test
|
11
|
+
years = [17, 18, 19, 20, 21] # years to test
|
12
|
+
|
13
|
+
# setup input files
|
14
|
+
# if not glob.glob(os.path.join(CUR_PATH, '*in.out-taxsim')):
|
15
|
+
input_setup.taxsim_io(assumption_set, years)
|
16
|
+
|
17
|
+
# run taxcalc/taxsim comparison
|
18
|
+
tests_passed_dict = {"a": {}, "b": {}, "c": {}}
|
19
|
+
for letter in assumption_set:
|
20
|
+
for year in years:
|
21
|
+
tests_passed = main_comparison.main(letter, year)
|
22
|
+
tests_passed_dict[letter][year] = tests_passed
|
23
|
+
|
24
|
+
# clean up files
|
25
|
+
for file in CUR_PATH:
|
26
|
+
for file in glob.glob("*.out*") and glob.glob("*.in*"):
|
27
|
+
if file.endswith("taxcalc"):
|
28
|
+
os.remove(file)
|
29
|
+
if file.endswith("taxsim"):
|
30
|
+
os.remove(file)
|
31
|
+
for file in glob.glob("*.in"):
|
32
|
+
os.remove(file)
|
33
|
+
# If tests passed, clean up the actual_differences directory
|
34
|
+
# keep if tests fail to help diagnose the problem
|
35
|
+
any_fail = False
|
36
|
+
for letter in assumption_set:
|
37
|
+
for year in years:
|
38
|
+
if tests_passed_dict[letter][year]:
|
39
|
+
print(
|
40
|
+
"************************************************** \n"
|
41
|
+
+ "************************************************** \n"
|
42
|
+
+ "Validation tests for "
|
43
|
+
+ letter
|
44
|
+
+ str(year)
|
45
|
+
+ " pass. "
|
46
|
+
+ "Any differences betweeen "
|
47
|
+
+ "taxcalc and TAXSIM-35 are expected due to modeling "
|
48
|
+
+ "differences. \n"
|
49
|
+
+ "************************************************** \n"
|
50
|
+
+ "**************************************************"
|
51
|
+
)
|
52
|
+
file = os.path.join(
|
53
|
+
CUR_PATH,
|
54
|
+
"actual_differences",
|
55
|
+
letter + str(year) + "differences.xlsx",
|
56
|
+
)
|
57
|
+
os.remove(file)
|
58
|
+
file = os.path.join(
|
59
|
+
CUR_PATH,
|
60
|
+
"actual_differences",
|
61
|
+
letter + str(year) + "-taxdiffs-actual.csv",
|
62
|
+
)
|
63
|
+
os.remove(file)
|
64
|
+
|
65
|
+
else:
|
66
|
+
any_fail = True
|
67
|
+
print(
|
68
|
+
"************************************************** \n"
|
69
|
+
+ "************************************************** \n"
|
70
|
+
+ "At least one validation test for "
|
71
|
+
+ letter
|
72
|
+
+ str(year)
|
73
|
+
+ " failed. Please look "
|
74
|
+
+ "at differences in the actual and expected files and "
|
75
|
+
+ "resolve the unexpected differences. \n"
|
76
|
+
+ "************************************************** \n"
|
77
|
+
+ "**************************************************"
|
78
|
+
)
|
79
|
+
if not any_fail: # if none fail, remove the actual_differences directory
|
80
|
+
shutil.rmtree(os.path.join(CUR_PATH, "actual_differences"))
|