PyPI - taxcalc - Versions diffs - 4.2.1__py3-none-any.whl → 4.3.0__py3-none-any.whl - Mend

taxcalc 4.2.1py3-none-any.whl → 4.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

taxcalc/__init__.py +1 -1
taxcalc/assumptions/ASSUMPTIONS.md +53 -0
taxcalc/assumptions/README.md +17 -0
taxcalc/assumptions/economic_assumptions_template.json +77 -0
taxcalc/calcfunctions.py +7 -4
taxcalc/data.py +10 -5
taxcalc/policy.py +1 -1
taxcalc/policy_current_law.json +4649 -288
taxcalc/records.py +20 -15
taxcalc/reforms/2017_law.json +125 -0
taxcalc/reforms/2017_law.out.csv +10 -0
taxcalc/reforms/ARPA.json +78 -0
taxcalc/reforms/ARPA.out.csv +10 -0
taxcalc/reforms/BrownKhanna.json +23 -0
taxcalc/reforms/BrownKhanna.out.csv +10 -0
taxcalc/reforms/CARES.json +40 -0
taxcalc/reforms/CARES.out.csv +10 -0
taxcalc/reforms/ConsolidatedAppropriationsAct2021.json +15 -0
taxcalc/reforms/ConsolidatedAppropriationsAct2021.out.csv +10 -0
taxcalc/reforms/Larson2019.json +36 -0
taxcalc/reforms/Larson2019.out.csv +10 -0
taxcalc/reforms/README.md +22 -0
taxcalc/reforms/REFORMS.md +92 -0
taxcalc/reforms/Renacci.json +61 -0
taxcalc/reforms/Renacci.out.csv +10 -0
taxcalc/reforms/SandersDeFazio.json +15 -0
taxcalc/reforms/SandersDeFazio.out.csv +10 -0
taxcalc/reforms/TCJA.json +160 -0
taxcalc/reforms/TCJA.md +48 -0
taxcalc/reforms/TCJA.out.csv +10 -0
taxcalc/reforms/Trump2016.json +71 -0
taxcalc/reforms/Trump2016.out.csv +10 -0
taxcalc/reforms/Trump2017.json +51 -0
taxcalc/reforms/Trump2017.out.csv +10 -0
taxcalc/reforms/archive/Clinton2016.json +56 -0
taxcalc/reforms/archive/RyanBrady.json +104 -0
taxcalc/reforms/archive/TCJA_House.json +144 -0
taxcalc/reforms/archive/TCJA_House_Amended.json +152 -0
taxcalc/reforms/archive/TCJA_Reconciliation.json +187 -0
taxcalc/reforms/archive/TCJA_Senate.json +116 -0
taxcalc/reforms/archive/TCJA_Senate_111417.json +169 -0
taxcalc/reforms/archive/TCJA_Senate_120117.json +174 -0
taxcalc/reforms/cases.csv +10 -0
taxcalc/reforms/clp.out.csv +10 -0
taxcalc/reforms/ext.json +59 -0
taxcalc/reforms/growfactors_ext.csv +65 -0
taxcalc/reforms/ptaxes0.json +37 -0
taxcalc/reforms/ptaxes0.out.csv +10 -0
taxcalc/reforms/ptaxes1.json +21 -0
taxcalc/reforms/ptaxes1.out.csv +10 -0
taxcalc/reforms/ptaxes2.json +18 -0
taxcalc/reforms/ptaxes2.out.csv +10 -0
taxcalc/reforms/ptaxes3.json +28 -0
taxcalc/reforms/ptaxes3.out.csv +10 -0
taxcalc/taxcalcio.py +44 -22
taxcalc/tests/benefits_expect.csv +169 -0
taxcalc/tests/cmpi_cps_expect.txt +132 -0
taxcalc/tests/cmpi_puf_expect.txt +132 -0
taxcalc/tests/conftest.py +143 -0
taxcalc/tests/cpscsv_agg_expect.csv +26 -0
taxcalc/tests/puf_var_correl_coeffs_2016.csv +80 -0
taxcalc/tests/puf_var_wght_means_by_year.csv +80 -0
taxcalc/tests/pufcsv_agg_expect.csv +26 -0
taxcalc/tests/pufcsv_mtr_expect.txt +63 -0
taxcalc/tests/reforms.json +649 -0
taxcalc/tests/reforms_expect.csv +65 -0
taxcalc/tests/test_4package.py +67 -0
taxcalc/tests/test_benefits.py +86 -0
taxcalc/tests/test_calcfunctions.py +871 -0
taxcalc/tests/test_calculator.py +1021 -0
taxcalc/tests/test_compare.py +336 -0
taxcalc/tests/test_compatible_data.py +338 -0
taxcalc/tests/test_consumption.py +144 -0
taxcalc/tests/test_cpscsv.py +163 -0
taxcalc/tests/test_data.py +133 -0
taxcalc/tests/test_decorators.py +332 -0
taxcalc/tests/test_growdiff.py +102 -0
taxcalc/tests/test_growfactors.py +94 -0
taxcalc/tests/test_parameters.py +617 -0
taxcalc/tests/test_policy.py +1557 -0
taxcalc/tests/test_puf_var_stats.py +194 -0
taxcalc/tests/test_pufcsv.py +385 -0
taxcalc/tests/test_records.py +234 -0
taxcalc/tests/test_reforms.py +386 -0
taxcalc/tests/test_responses.py +41 -0
taxcalc/tests/test_taxcalcio.py +755 -0
taxcalc/tests/test_utils.py +792 -0
taxcalc/validation/CSV_INPUT_VARS.md +29 -0
taxcalc/validation/CSV_OUTPUT_VARS.md +63 -0
taxcalc/validation/README.md +68 -0
taxcalc/validation/taxsim35/Differences_Explained.md +54 -0
taxcalc/validation/taxsim35/README.md +139 -0
taxcalc/validation/taxsim35/expected_differences/a17-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/a18-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/a19-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/a20-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/a21-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/b17-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/b18-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/b19-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/b20-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/b21-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/c17-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/c18-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/expected_differences/c19-taxdiffs-expect.csv +25 -0
taxcalc/validation/taxsim35/input_setup.py +67 -0
taxcalc/validation/taxsim35/main_comparison.py +183 -0
taxcalc/validation/taxsim35/prepare_taxcalc_input.py +161 -0
taxcalc/validation/taxsim35/process_taxcalc_output.py +140 -0
taxcalc/validation/taxsim35/taxsim_emulation.json +49 -0
taxcalc/validation/taxsim35/taxsim_input.py +321 -0
taxcalc/validation/taxsim35/tc_sims.py +98 -0
taxcalc/validation/taxsim35/tests_35.py +80 -0
taxcalc/validation/tests_35.sh +13 -0
{taxcalc-4.2.1.dist-info → taxcalc-4.3.0.dist-info}/METADATA +3 -4
taxcalc-4.3.0.dist-info/RECORD +139 -0
{taxcalc-4.2.1.dist-info → taxcalc-4.3.0.dist-info}/WHEEL +1 -1
taxcalc/tmd_growfactors.csv +0 -55
taxcalc/tmd_weights.csv.gz +0 -0
taxcalc-4.2.1.dist-info/RECORD +0 -34
{taxcalc-4.2.1.dist-info → taxcalc-4.3.0.dist-info}/LICENSE +0 -0
{taxcalc-4.2.1.dist-info → taxcalc-4.3.0.dist-info}/entry_points.txt +0 -0
{taxcalc-4.2.1.dist-info → taxcalc-4.3.0.dist-info}/top_level.txt +0 -0

taxcalc/tests/test_utils.py ADDED Viewed

@@ -0,0 +1,792 @@
+"""
+Tests of Tax-Calculator utility functions.
+"""
+# CODING-STYLE CHECKS:
+# pycodestyle test_utils.py
+# pylint --disable=locally-disabled test_utils.py
+#
+# pylint: disable=missing-docstring
+import os
+import math
+import random
+import numpy as np
+import pandas as pd
+import pytest
+# pylint: disable=import-error
+from taxcalc import Policy, Records, Calculator
+from taxcalc.utils import (DIST_VARIABLES,
+                           DIST_TABLE_COLUMNS, DIST_TABLE_LABELS,
+                           DIFF_VARIABLES,
+                           DIFF_TABLE_COLUMNS, DIFF_TABLE_LABELS,
+                           SOI_AGI_BINS,
+                           create_difference_table,
+                           weighted_sum, weighted_mean,
+                           wage_weighted, agi_weighted,
+                           expanded_income_weighted,
+                           add_income_table_row_variable,
+                           add_quantile_table_row_variable,
+                           mtr_graph_data, atr_graph_data,
+                           xtr_graph_plot, write_graph_file,
+                           read_egg_csv, read_egg_json, delete_file,
+                           bootstrap_se_ci,
+                           certainty_equivalent,
+                           ce_aftertax_expanded_income)
+DATA = [[1.0, 2, 'a'],
+        [-1.0, 4, 'a'],
+        [3.0, 6, 'a'],
+        [2.0, 4, 'b'],
+        [3.0, 6, 'b']]
+WEIGHT_DATA = [[1.0, 2.0, 10.0],
+               [2.0, 4.0, 20.0],
+               [3.0, 6.0, 30.0]]
+DATA_FLOAT = [[1.0, 2, 'a'],
+              [-1.0, 4, 'a'],
+              [0.0000000001, 3, 'a'],
+              [-0.0000000001, 1, 'a'],
+              [3.0, 6, 'a'],
+              [2.0, 4, 'b'],
+              [0.0000000001, 3, 'b'],
+              [-0.0000000001, 1, 'b'],
+              [3.0, 6, 'b']]
+def test_validity_of_name_lists():
+    assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
+    records_varinfo = Records(data=None)
+    assert set(DIST_VARIABLES).issubset(records_varinfo.CALCULATED_VARS |
+                                        {'s006', 'XTOT'})
+    extra_vars_set = set(['count',
+                          'count_StandardDed',
+                          'count_ItemDed',
+                          'count_AMT'])
+    assert (set(DIST_TABLE_COLUMNS) - set(DIST_VARIABLES)) == extra_vars_set
+def test_create_tables(cps_subsample):
+    # pylint: disable=too-many-statements,too-many-branches
+    # create a current-law Policy object and Calculator object calc1
+    rec = Records.cps_constructor(data=cps_subsample)
+    pol = Policy()
+    calc1 = Calculator(policy=pol, records=rec)
+    calc1.calc_all()
+    # create a policy-reform Policy object and Calculator object calc2
+    reform = {'II_rt1': {2013: 0.15}}
+    pol.implement_reform(reform)
+    calc2 = Calculator(policy=pol, records=rec)
+    calc2.calc_all()
+    test_failure = False
+    # test creating various difference tables
+    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
+                                   calc2.dataframe(DIFF_VARIABLES),
+                                   'standard_income_bins', 'combined')
+    assert isinstance(diff, pd.DataFrame)
+    tabcol = 'pc_aftertaxinc'
+    expected = [0.0,
+                np.nan,
+                -0.1,
+                -0.5,
+                -0.7,
+                -0.7,
+                -0.8,
+                -0.7,
+                -0.7,
+                -0.7,
+                -0.3,
+                -0.1,
+                -0.0,
+                -0.6]
+    if not np.allclose(diff[tabcol].values.astype('float'), expected,
+                       atol=0.1, rtol=0.0, equal_nan=True):
+        test_failure = True
+        print('diff xbin', tabcol)
+        for val in diff[tabcol].values:
+            print('{:.1f},'.format(val))
+    diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
+                                   calc2.dataframe(DIFF_VARIABLES),
+                                   'weighted_deciles', 'combined')
+    assert isinstance(diff, pd.DataFrame)
+    tabcol = 'tot_change'
+    expected = [0.0,
+                0.0,
+                0.0,
+                0.6,
+                2.9,
+                3.5,
+                4.4,
+                6.1,
+                6.5,
+                8.7,
+                12.0,
+                13.3,
+                58.0,
+                7.7,
+                4.8,
+                0.8]
+    if not np.allclose(diff[tabcol].values.astype('float'), expected,
+                       atol=0.1, rtol=0.0):
+        test_failure = True
+        print('diff xdec', tabcol)
+        for val in diff[tabcol].values:
+            print('{:.1f},'.format(val))
+    tabcol = 'share_of_change'
+    expected = [0.0,
+                0.0,
+                0.0,
+                1.0,
+                5.0,
+                6.0,
+                7.6,
+                10.6,
+                11.1,
+                15.1,
+                20.7,
+                22.9,
+                100.0,
+                13.2,
+                8.3,
+                1.4,]
+    if not np.allclose(diff[tabcol].values.astype('float'), expected,
+                       atol=0.1, rtol=0.0):
+        test_failure = True
+        print('diff xdec', tabcol)
+        for val in diff[tabcol].values:
+            print('{:.1f},'.format(val))
+    tabcol = 'pc_aftertaxinc'
+    expected = [np.nan,
+                0.0,
+                -0.0,
+                -0.3,
+                -0.8,
+                -0.7,
+                -0.7,
+                -0.8,
+                -0.7,
+                -0.7,
+                -0.7,
+                -0.3,
+                -0.6,
+                -0.7,
+                -0.4,
+                -0.1]
+    if not np.allclose(diff[tabcol].values.astype('float'), expected,
+                       atol=0.1, rtol=0.0, equal_nan=True):
+        test_failure = True
+        print('diff xdec', tabcol)
+        for val in diff[tabcol].values:
+            print('{:.1f},'.format(val))
+    # test creating various distribution tables
+    dist, _ = calc2.distribution_tables(None, 'weighted_deciles')
+    assert isinstance(dist, pd.DataFrame)
+    tabcol = 'iitax'
+    expected = [0.0,
+                0.0,
+                -0.4,
+                -4.1,
+                -5.9,
+                8.0,
+                16.9,
+                29.0,
+                27.0,
+                71.4,
+                153.4,
+                910.1,
+                1205.5,
+                159.4,
+                268.1,
+                482.7]
+    if not np.allclose(dist[tabcol].values.astype('float'), expected,
+                       atol=0.1, rtol=0.0):
+        test_failure = True
+        print('dist xdec', tabcol)
+        for val in dist[tabcol].values:
+            print('{:.1f},'.format(val))
+    tabcol = 'count_ItemDed'
+    expected = [0.0,
+                0.0,
+                0.0,
+                1.1,
+                2.6,
+                3.9,
+                4.7,
+                6.3,
+                6.5,
+                7.4,
+                11.3,
+                16.3,
+                60.3,
+                7.4,
+                7.2,
+                1.7]
+    if not np.allclose(dist[tabcol].tolist(), expected,
+                       atol=0.1, rtol=0.0):
+        test_failure = True
+        print('dist xdec', tabcol)
+        for val in dist[tabcol].values:
+            print('{:.1f},'.format(val))
+    tabcol = 'expanded_income'
+    expected = [0.0,
+                -1.4,
+                30.7,
+                209.8,
+                388.8,
+                541.2,
+                679.1,
+                847.6,
+                1097.1,
+                1430.7,
+                1978.3,
+                5007.6,
+                12209.4,
+                1410.9,
+                1765.5,
+                1831.2]
+    if not np.allclose(dist[tabcol].tolist(), expected,
+                       atol=0.1, rtol=0.0):
+        test_failure = True
+        print('dist xdec', tabcol)
+        for val in dist[tabcol].values:
+            print('{:.1f},'.format(val))
+    tabcol = 'aftertax_income'
+    expected = [0.0,
+                -1.4,
+                29.0,
+                195.5,
+                363.0,
+                491.0,
+                612.2,
+                747.1,
+                980.6,
+                1248.0,
+                1630.2,
+                3741.3,
+                10036.6,
+                1100.9,
+                1339.0,
+                1301.4]
+    if not np.allclose(dist[tabcol].tolist(), expected,
+                       atol=0.1, rtol=0.0):
+        test_failure = True
+        print('dist xdec', tabcol)
+        for val in dist[tabcol].values:
+            print('{:.1f},'.format(val))
+    dist, _ = calc2.distribution_tables(None, 'standard_income_bins')
+    assert isinstance(dist, pd.DataFrame)
+    tabcol = 'iitax'
+    expected = [0.0,
+                0.0,
+                -1.3,
+                -7.6,
+                -1.2,
+                20.7,
+                26.3,
+                47.2,
+                95.5,
+                321.9,
+                324.0,
+                64.8,
+                315.2,
+                1205.5]
+    if not np.allclose(dist[tabcol].values.astype('float'), expected,
+                       atol=0.1, rtol=0.0):
+        test_failure = True
+        print('dist xbin', tabcol)
+        for val in dist[tabcol].values:
+            print('{:.1f},'.format(val))
+    tabcol = 'count_ItemDed'
+    expected = [0.0,
+                0.0,
+                0.2,
+                1.8,
+                3.6,
+                5.9,
+                5.7,
+                10.2,
+                8.1,
+                17.7,
+                6.7,
+                0.3,
+                0.1,
+                60.3]
+    if not np.allclose(dist[tabcol].tolist(), expected,
+                       atol=0.1, rtol=0.0):
+        test_failure = True
+        print('dist xbin', tabcol)
+        for val in dist[tabcol].values:
+            print('{:.1f},'.format(val))
+    if test_failure:
+        assert 1 == 2
+def test_diff_count_precision():
+    """
+    Estimate bootstrap standard error and confidence interval for count
+    statistics ('tax_cut' and 'tax_inc') in difference table generated
+    using puf.csv input data taking no account of tbi privacy fuzzing and
+    assuming all filing units in each bin have the same weight.  These
+    assumptions imply that the estimates produced here are likely to
+    over-estimate the precision of the count statistics.
+    Background information on unweighted number of filing units by bin:
+    DECILE BINS:
+    0   16268
+    1   14897
+    2   13620
+    3   15760
+    4   16426
+    5   18070
+    6   18348
+    7   19352
+    8   21051
+    9   61733 <--- largest unweighted bin count
+    A  215525
+    STANDARD BINS:
+    0    7081 <--- negative income bin was dropped in TaxBrain display
+    1   19355
+    2   22722
+    3   20098
+    4   17088
+    5   14515
+    6   24760
+    7   15875
+    8   25225
+    9   15123
+    10  10570 <--- smallest unweighted bin count
+    11  23113 <--- second largest unweighted WEBAPP bin count
+    A  215525
+    Background information on Trump2017.json reform used in TaxBrain run 16649:
+    STANDARD bin 10 ($500-1000 thousand) has weighted count of 1179 thousand;
+                    weighted count of units with tax increase is 32 thousand.
+    So, the mean weight for all units in STANDARD bin 10 is 111.5421 and the
+    unweighted number with a tax increase is 287 assuming all units in that
+    bin have the same weight.  (Note that 287 * 111.5421 is about 32,012.58,
+    which rounds to the 32 thousand shown in the TaxBrain difference table.)
+    STANDARD bin 11 ($1000+ thousand) has weighted count of 636 thousand;
+                    weighted count of units with tax increase is 27 thousand.
+    So, the mean weight for all units in STANDARD bin 11 is about 27.517 and
+    the unweighted number with a tax increase is 981 assuming all units in
+    that bin have the same weight.  (Note that 981 * 27.517 is about 26,994.18,
+    which rounds to the 27 thousand shown in the TaxBrain difference table.)
+    """
+    dump = False  # setting to True implies results printed and test fails
+    seed = 123456789
+    bs_samples = 1000
+    alpha = 0.025  # implies 95% confidence interval
+    # compute stderr and confidence interval for STANDARD bin 10 increase count
+    data_list = [111.5421] * 287 + [0.0] * (10570 - 287)
+    assert len(data_list) == 10570
+    data = np.array(data_list)
+    assert (data > 0).sum() == 287
+    data_estimate = np.sum(data) * 1e-3
+    assert abs((data_estimate / 32) - 1) < 0.0005
+    bsd = bootstrap_se_ci(data, seed, bs_samples, np.sum, alpha)
+    stderr = bsd['se'] * 1e-3
+    cilo = bsd['cilo'] * 1e-3
+    cihi = bsd['cihi'] * 1e-3
+    if dump:
+        res = '{}EST={:.1f} B={} alpha={:.3f} se={:.2f} ci=[ {:.2f} , {:.2f} ]'
+        print(
+            res.format('STANDARD-BIN10: ',
+                       data_estimate, bs_samples, alpha, stderr, cilo, cihi)
+        )
+    assert abs((stderr / 1.90) - 1) < 0.0008
+    # NOTE: a se of 1.90 thousand implies that when comparing the difference
+    #       in the weighted number of filing units in STANDARD bin 10 with a
+    #       tax increase, the difference statistic has a bigger se (because
+    #       the variance of the difference is the sum of the variances of the
+    #       two point estimates).  So, in STANDARD bin 10 if the point
+    #       estimates both had se = 1.90, then the difference in the point
+    #       estimates has has a se = 2.687.  This means that the difference
+    #       would have to be over 5 thousand in order for there to be high
+    #       confidence that the difference was different from zero in a
+    #       statistically significant manner.
+    #       Or put a different way, a difference of 1 thousand cannot be
+    #       accurately detected while a difference of 10 thousand can be
+    #       accurately detected.
+    assert abs((cilo / 28.33) - 1) < 0.0012
+    assert abs((cihi / 35.81) - 1) < 0.0012
+    # compute stderr and confidence interval for STANDARD bin 11 increase count
+    data_list = [27.517] * 981 + [0.0] * (23113 - 981)
+    assert len(data_list) == 23113
+    data = np.array(data_list)
+    assert (data > 0).sum() == 981
+    data_estimate = np.sum(data) * 1e-3
+    assert abs((data_estimate / 27) - 1) < 0.0005
+    bsd = bootstrap_se_ci(data, seed, bs_samples, np.sum, alpha)
+    stderr = bsd['se'] * 1e-3
+    cilo = bsd['cilo'] * 1e-3
+    cihi = bsd['cihi'] * 1e-3
+    if dump:
+        res = '{}EST={:.1f} B={} alpha={:.3f} se={:.2f} ci=[ {:.2f} , {:.2f} ]'
+        print(
+            res.format('STANDARD-BIN11: ',
+                       data_estimate, bs_samples, alpha, stderr, cilo, cihi)
+        )
+    assert abs((stderr / 0.85) - 1) < 0.0040
+    # NOTE: a se of 0.85 thousand implies that when comparing the difference
+    #       in the weighted number of filing units in STANDARD bin 11 with a
+    #       tax increase, the difference statistic has a bigger se (because
+    #       the variance of the difference is the sum of the variances of the
+    #       two point estimates).  So, in STANDARD bin 11 if point estimates
+    #       both had se = 0.85, then the difference in the point estimates has
+    #       has a se = 1.20.  This means that the difference would have to be
+    #       over 2.5 thousand in order for there to be high confidence that the
+    #       difference was different from zero in a statistically significant
+    #       manner.
+    #       Or put a different way, a difference of 1 thousand cannot be
+    #       accurately detected while a difference of 10 thousand can be
+    #       accurately detected.
+    assert abs((cilo / 25.37) - 1) < 0.0012
+    assert abs((cihi / 28.65) - 1) < 0.0012
+    # fail if doing dump
+    assert not dump
+def test_weighted_mean():
+    dfx = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
+    grouped = dfx.groupby('label')
+    diffs = grouped.apply(weighted_mean, 'tax_diff', include_groups=False)
+    exp = pd.Series(data=[16.0 / 12.0, 26.0 / 10.0], index=['a', 'b'])
+    exp.index.name = 'label'
+    pd.testing.assert_series_equal(exp, diffs)
+def test_wage_weighted():
+    dfx = pd.DataFrame(data=WEIGHT_DATA, columns=['var', 's006', 'e00200'])
+    wvar = wage_weighted(dfx, 'var')
+    assert round(wvar, 4) == 2.5714
+def test_agi_weighted():
+    dfx = pd.DataFrame(data=WEIGHT_DATA, columns=['var', 's006', 'c00100'])
+    wvar = agi_weighted(dfx, 'var')
+    assert round(wvar, 4) == 2.5714
+def test_expanded_income_weighted():
+    dfx = pd.DataFrame(data=WEIGHT_DATA,
+                       columns=['var', 's006', 'expanded_income'])
+    wvar = expanded_income_weighted(dfx, 'var')
+    assert round(wvar, 4) == 2.5714
+def test_weighted_sum():
+    dfx = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
+    grouped = dfx.groupby('label')
+    diffs = grouped.apply(weighted_sum, 'tax_diff', include_groups=False)
+    exp = pd.Series(data=[16.0, 26.0], index=['a', 'b'])
+    exp.index.name = 'label'
+    pd.testing.assert_series_equal(exp, diffs)
+EPSILON = 1e-5
+def test_add_income_trow_var():
+    dta = np.arange(1, 1e6, 5000)
+    vdf = pd.DataFrame(data=dta, columns=['expanded_income'])
+    vdf = add_income_table_row_variable(vdf, 'expanded_income', SOI_AGI_BINS)
+    gdf = vdf.groupby('table_row', observed=False)
+    idx = 1
+    for name, _ in gdf:
+        assert name.closed == 'left'
+        assert abs(name.right - SOI_AGI_BINS[idx]) < EPSILON
+        idx += 1
+def test_add_quantile_trow_var():
+    dfx = pd.DataFrame(data=DATA, columns=['expanded_income', 's006', 'label'])
+    dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
+                                          100, decile_details=False,
+                                          weight_by_income_measure=False)
+    bin_labels = dfb['table_row'].unique()
+    default_labels = set(range(1, 101))
+    for lab in bin_labels:
+        assert lab in default_labels
+    dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
+                                          100, decile_details=False)
+    assert 'table_row' in dfb
+    with pytest.raises(ValueError):
+        dfb = add_quantile_table_row_variable(dfx, 'expanded_income',
+                                              100, decile_details=True)
+def test_dist_table_sum_row(cps_subsample):
+    rec = Records.cps_constructor(data=cps_subsample)
+    calc = Calculator(policy=Policy(), records=rec)
+    calc.calc_all()
+    # create three distribution tables and compare the ALL row contents
+    tb1, _ = calc.distribution_tables(None, 'standard_income_bins')
+    tb2, _ = calc.distribution_tables(None, 'soi_agi_bins')
+    tb3, _ = calc.distribution_tables(None, 'weighted_deciles')
+    tb4, _ = calc.distribution_tables(None, 'weighted_deciles',
+                                      pop_quantiles=True)
+    assert np.allclose(tb1.loc['ALL'].values.astype('float'),
+                       tb2.loc['ALL'].values.astype('float'))
+    assert np.allclose(tb1.loc['ALL'].values.astype('float'),
+                       tb3.loc['ALL'].values.astype('float'))
+    # make sure population count is larger than filing-unit count
+    assert tb4.at['ALL', 'count'] > tb1.at['ALL', 'count']
+    # make sure population table has same ALL row values as filing-unit table
+    for col in ['count', 'count_StandardDed', 'count_ItemDed', 'count_AMT']:
+        tb4.at['ALL', col] = tb1.at['ALL', col]
+    assert np.allclose(tb1.loc['ALL'].values.astype('float'),
+                       tb4.loc['ALL'].values.astype('float'))
+    # make sure population table has same ALL tax liabilities as diagnostic tbl
+    dgt = calc.diagnostic_table(1)
+    assert np.allclose([tb4.at['ALL', 'iitax'],
+                        tb4.at['ALL', 'payrolltax']],
+                       [dgt.at['Ind Income Tax ($b)', calc.current_year],
+                        dgt.at['Payroll Taxes ($b)', calc.current_year]])
+def test_diff_table_sum_row(cps_subsample):
+    rec = Records.cps_constructor(data=cps_subsample)
+    # create a current-law Policy object and Calculator calc1
+    pol = Policy()
+    calc1 = Calculator(policy=pol, records=rec)
+    calc1.calc_all()
+    # create a policy-reform Policy object and Calculator calc2
+    reform = {'II_rt4': {2013: 0.56}}
+    pol.implement_reform(reform)
+    calc2 = Calculator(policy=pol, records=rec)
+    calc2.calc_all()
+    # create three difference tables and compare their content
+    dv1 = calc1.dataframe(DIFF_VARIABLES)
+    dv2 = calc2.dataframe(DIFF_VARIABLES)
+    dt1 = create_difference_table(
+        dv1, dv2, 'standard_income_bins', 'iitax')
+    dt2 = create_difference_table(dv1, dv2, 'soi_agi_bins', 'iitax')
+    dt3 = create_difference_table(
+        dv1, dv2, 'weighted_deciles', 'iitax', pop_quantiles=False)
+    dt4 = create_difference_table(
+        dv1, dv2, 'weighted_deciles', 'iitax', pop_quantiles=True)
+    assert np.allclose(dt1.loc['ALL'].values.astype('float'),
+                       dt2.loc['ALL'].values.astype('float'))
+    assert np.allclose(dt1.loc['ALL'].values.astype('float'),
+                       dt3.loc['ALL'].values.astype('float'))
+    # make sure population count is larger than filing-unit count
+    assert dt4.at['ALL', 'count'] > dt1.at['ALL', 'count']
+def test_mtr_graph_data(cps_subsample):
+    recs = Records.cps_constructor(data=cps_subsample)
+    calc = Calculator(policy=Policy(), records=recs)
+    year = calc.current_year
+    with pytest.raises(ValueError):
+        mtr_graph_data(None, year, mars='bad',
+                       income_measure='agi',
+                       dollar_weighting=True)
+    with pytest.raises(ValueError):
+        mtr_graph_data(None, year, mars=0,
+                       income_measure='expanded_income',
+                       dollar_weighting=True)
+    with pytest.raises(ValueError):
+        mtr_graph_data(None, year, mars=list())
+    with pytest.raises(ValueError):
+        mtr_graph_data(None, year, mars='ALL', mtr_variable='e00200s')
+    with pytest.raises(ValueError):
+        mtr_graph_data(None, year, mtr_measure='badtax')
+    with pytest.raises(ValueError):
+        mtr_graph_data(None, year, income_measure='badincome')
+    mtr = 0.20 * np.ones_like(cps_subsample['e00200'])
+    vdf = calc.dataframe(['s006', 'MARS', 'e00200'])
+    vdf['mtr1'] = mtr
+    vdf['mtr2'] = mtr
+    vdf = vdf[vdf['MARS'] == 1]
+    gdata = mtr_graph_data(vdf, year, mars=1,
+                           mtr_wrt_full_compen=True,
+                           income_measure='wages',
+                           dollar_weighting=True)
+    assert isinstance(gdata, dict)
+def test_atr_graph_data(cps_subsample):
+    pol = Policy()
+    rec = Records.cps_constructor(data=cps_subsample)
+    calc = Calculator(policy=pol, records=rec)
+    year = calc.current_year
+    with pytest.raises(ValueError):
+        atr_graph_data(None, year, mars='bad')
+    with pytest.raises(ValueError):
+        atr_graph_data(None, year, mars=0)
+    with pytest.raises(ValueError):
+        atr_graph_data(None, year, mars=list())
+    with pytest.raises(ValueError):
+        atr_graph_data(None, year, atr_measure='badtax')
+    calc.calc_all()
+    vdf = calc.dataframe(['s006', 'MARS', 'expanded_income'])
+    tax = 0.20 * np.ones_like(vdf['expanded_income'])
+    vdf['tax1'] = tax
+    vdf['tax2'] = tax
+    gdata = atr_graph_data(vdf, year, mars=1, atr_measure='combined')
+    gdata = atr_graph_data(vdf, year, atr_measure='itax')
+    gdata = atr_graph_data(vdf, year, atr_measure='ptax')
+    assert isinstance(gdata, dict)
+def test_xtr_graph_plot(cps_subsample):
+    recs = Records.cps_constructor(data=cps_subsample)
+    calc = Calculator(policy=Policy(), records=recs)
+    mtr = 0.20 * np.ones_like(cps_subsample['e00200'])
+    vdf = calc.dataframe(['s006', 'MARS', 'c00100'])
+    vdf['mtr1'] = mtr
+    vdf['mtr2'] = mtr
+    gdata = mtr_graph_data(vdf, calc.current_year, mtr_measure='ptax',
+                           income_measure='agi',
+                           dollar_weighting=False)
+    gplot = xtr_graph_plot(gdata)
+    assert gplot
+    vdf = calc.dataframe(['s006', 'expanded_income'])
+    vdf['mtr1'] = mtr
+    vdf['mtr2'] = mtr
+    gdata = mtr_graph_data(vdf, calc.current_year, mtr_measure='itax',
+                           alt_e00200p_text='Taxpayer Earnings',
+                           income_measure='expanded_income',
+                           dollar_weighting=False)
+    assert isinstance(gdata, dict)
+def temporary_filename(suffix=''):
+    # Return string containing the temporary filename.
+    return 'tmp{}{}'.format(random.randint(10000000, 99999999), suffix)
+def test_write_graph_file(cps_subsample):
+    recs = Records.cps_constructor(data=cps_subsample)
+    calc = Calculator(policy=Policy(), records=recs)
+    mtr = 0.20 * np.ones_like(cps_subsample['e00200'])
+    vdf = calc.dataframe(['s006', 'e00200', 'c00100'])
+    vdf['mtr1'] = mtr
+    vdf['mtr2'] = mtr
+    gdata = mtr_graph_data(vdf, calc.current_year, mtr_measure='ptax',
+                           alt_e00200p_text='Taxpayer Earnings',
+                           income_measure='agi',
+                           dollar_weighting=False)
+    gplot = xtr_graph_plot(gdata)
+    assert gplot
+    htmlfname = temporary_filename(suffix='.html')
+    try:
+        write_graph_file(gplot, htmlfname, 'title')
+    except Exception:  # pylint: disable=broad-except
+        if os.path.isfile(htmlfname):
+            try:
+                os.remove(htmlfname)
+            except OSError:
+                pass  # sometimes we can't remove a generated temporary file
+        assert 'write_graph_file()_ok' == 'no'
+    # if try was successful, try to remove the file
+    if os.path.isfile(htmlfname):
+        try:
+            os.remove(htmlfname)
+        except OSError:
+            pass  # sometimes we can't remove a generated temporary file
+def test_ce_aftertax_income(cps_subsample):
+    # test certainty_equivalent() function with con>cmin
+    con = 5000
+    cmin = 1000
+    assert con == round(certainty_equivalent(con, 0, cmin), 6)
+    assert con > round(certainty_equivalent((math.log(con) - 0.1), 1, cmin), 6)
+    # test certainty_equivalent() function with con<cmin
+    con = 500
+    cmin = 1000
+    assert con == round(certainty_equivalent(con, 0, cmin), 6)
+    # test with require_no_agg_tax_change equal to False
+    rec = Records.cps_constructor(data=cps_subsample)
+    cyr = 2020
+    # specify calc1 and calc_all() for cyr
+    pol = Policy()
+    calc1 = Calculator(policy=pol, records=rec)
+    calc1.advance_to_year(cyr)
+    calc1.calc_all()
+    # specify calc2 and calc_all() for cyr
+    reform = {'II_em': {2019: 1000}}
+    pol.implement_reform(reform)
+    calc2 = Calculator(policy=pol, records=rec)
+    calc2.advance_to_year(cyr)
+    calc2.calc_all()
+    df1 = calc1.dataframe(['s006', 'combined', 'expanded_income'])
+    df2 = calc2.dataframe(['s006', 'combined', 'expanded_income'])
+    cedict = ce_aftertax_expanded_income(df1, df2,
+                                         require_no_agg_tax_change=False)
+    assert isinstance(cedict, dict)
+    np.allclose(cedict['ceeu1'], [55641, 27167, 5726, 2229, 1565],
+                atol=0.5, rtol=0.0)
+    np.allclose(cedict['ceeu2'], [54629, 26698, 5710, 2229, 1565],
+                atol=0.5, rtol=0.0)
+    # test with require_no_agg_tax_change equal to True
+    with pytest.raises(ValueError):
+        ce_aftertax_expanded_income(df1, df2, require_no_agg_tax_change=True)
+    # test with require_no_agg_tax_change equal to False and custom_params
+    params = {'crra_list': [0, 2], 'cmin_value': 2000}
+    with pytest.raises(ValueError):
+        ce_aftertax_expanded_income(df1, df2, require_no_agg_tax_change=True,
+                                    custom_params=params)
+def test_read_egg_csv():
+    with pytest.raises(ValueError):
+        read_egg_csv('bad_filename')
+def test_read_egg_json():
+    with pytest.raises(ValueError):
+        read_egg_json('bad_filename')
+def test_create_delete_temp_file():
+    # test temporary_filename() and delete_file() functions
+    fname = temporary_filename()
+    with open(fname, 'w') as tmpfile:
+        tmpfile.write('any content will do')
+    assert os.path.isfile(fname) is True
+    delete_file(fname)
+    assert os.path.isfile(fname) is False
+def test_bootstrap_se_ci():
+    # Use treated mouse data from Table 2.1 and
+    # results from Table 2.2 and Table 13.1 in
+    # Bradley Efron and Robert Tibshirani,
+    # "An Introduction to the Bootstrap"
+    # (Chapman & Hall, 1993).
+    data = np.array([94, 197, 16, 38, 99, 141, 23], dtype=np.float64)
+    assert abs(np.mean(data) - 86.86) < 0.005  # this is just rounding error
+    bsd = bootstrap_se_ci(data, 123456789, 1000, np.mean, alpha=0.025)
+    # following comparisons are less precise because of r.n. stream differences
+    assert abs(bsd['se'] / 23.02 - 1) < 0.02
+    assert abs(bsd['cilo'] / 45.9 - 1) < 0.02
+    assert abs(bsd['cihi'] / 135.4 - 1) < 0.03
+def test_table_columns_labels():
+    # check that length of two lists are the same
+    assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
+    assert len(DIFF_TABLE_COLUMNS) == len(DIFF_TABLE_LABELS)

taxcalc 4.2.1__py3-none-any.whl → 4.3.0__py3-none-any.whl

taxcalc 4.2.1py3-none-any.whl → 4.3.0py3-none-any.whl