taxcalc 4.5.0__py3-none-any.whl → 4.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
taxcalc/taxcalcio.py CHANGED
@@ -54,9 +54,8 @@ class TaxCalcIO():
54
54
  None implies economic assumptions are standard assumptions,
55
55
  or string is name of optional ASSUMP file.
56
56
 
57
- outdir: None or string
58
- None implies output files written to current directory,
59
- or string is name of optional output directory
57
+ silent: boolean
58
+ whether or not to suppress action messages.
60
59
 
61
60
  Returns
62
61
  -------
@@ -65,9 +64,10 @@ class TaxCalcIO():
65
64
  # pylint: disable=too-many-instance-attributes
66
65
 
67
66
  def __init__(self, input_data, tax_year, baseline, reform, assump,
68
- outdir=None):
67
+ silent=True):
69
68
  # pylint: disable=too-many-arguments,too-many-positional-arguments
70
69
  # pylint: disable=too-many-branches,too-many-statements,too-many-locals
70
+ self.silent = silent
71
71
  self.gf_reform = None
72
72
  self.errmsg = ''
73
73
  # check name and existence of INPUT file
@@ -115,23 +115,37 @@ class TaxCalcIO():
115
115
  else:
116
116
  msg = 'INPUT is neither string nor Pandas DataFrame'
117
117
  self.errmsg += f'ERROR: {msg}\n'
118
- # check name and existence of BASELINE file
118
+ # check name(s) and existence of BASELINE file(s)
119
119
  bas = '-x'
120
120
  if baseline is None:
121
+ self.specified_baseline = False
121
122
  bas = '-#'
122
123
  elif isinstance(baseline, str):
123
- # remove any leading directory path from BASELINE filename
124
- fname = os.path.basename(baseline)
125
- # check if fname ends with ".json"
126
- if fname.endswith('.json'):
127
- bas = f'-{fname[:-5]}'
128
- else:
129
- msg = 'BASELINE file name does not end in .json'
130
- self.errmsg += f'ERROR: {msg}\n'
131
- # check existence of BASELINE file
132
- if not os.path.isfile(baseline):
133
- msg = 'BASELINE file could not be found'
134
- self.errmsg += f'ERROR: {msg}\n'
124
+ self.specified_baseline = True
125
+ # split any compound baseline into list of simple reforms
126
+ basnames = []
127
+ baselines = baseline.split('+')
128
+ for bas in baselines:
129
+ # remove any leading directory path from bas filename
130
+ fname = os.path.basename(bas)
131
+ # check if fname ends with ".json"
132
+ if not fname.endswith('.json'):
133
+ msg = f'{fname} does not end in .json'
134
+ self.errmsg += f'ERROR: BASELINE file name {msg}\n'
135
+ # check existence of BASELINE file
136
+ if not os.path.isfile(bas):
137
+ msg = f'{bas} could not be found'
138
+ self.errmsg += f'ERROR: BASELINE file {msg}\n'
139
+ # add fname to list of basnames used in output file names
140
+ basnames.append(fname)
141
+ # create (possibly compound) baseline name for output file names
142
+ bas = '-'
143
+ num_basnames = 0
144
+ for basname in basnames:
145
+ num_basnames += 1
146
+ if num_basnames > 1:
147
+ bas += '+'
148
+ bas += f'{basname[:-5]}'
135
149
  else:
136
150
  msg = 'TaxCalcIO.ctor: baseline is neither None nor str'
137
151
  self.errmsg += f'ERROR: {msg}\n'
@@ -189,44 +203,28 @@ class TaxCalcIO():
189
203
  else:
190
204
  msg = 'TaxCalcIO.ctor: assump is neither None nor str'
191
205
  self.errmsg += f'ERROR: {msg}\n'
192
- # check name and existence of OUTDIR
193
- if outdir is None:
194
- valid_outdir = True
195
- elif isinstance(outdir, str):
196
- # check existence of OUTDIR
197
- if os.path.isdir(outdir):
198
- valid_outdir = True
199
- else:
200
- valid_outdir = False
201
- msg = 'OUTDIR could not be found'
202
- self.errmsg += f'ERROR: {msg}\n'
203
- else:
204
- valid_outdir = False
205
- msg = 'TaxCalcIO.ctor: outdir is neither None nor str'
206
- self.errmsg += f'ERROR: {msg}\n'
207
206
  # create OUTPUT file name and delete any existing output files
208
- output_filename = f'{inp}{bas}{ref}{asm}.csv'
209
- if outdir is None:
210
- self._output_filename = output_filename
211
- delete_old_files = True
212
- elif valid_outdir:
213
- self._output_filename = os.path.join(outdir, output_filename)
214
- delete_old_files = True
215
- else:
216
- delete_old_files = False
217
- if delete_old_files:
218
- delete_file(self._output_filename)
219
- delete_file(self._output_filename.replace('.csv', '.db'))
220
- delete_file(self._output_filename.replace('.csv', '-doc.text'))
221
- delete_file(self._output_filename.replace('.csv', '-tab.text'))
222
- delete_file(self._output_filename.replace('.csv', '-atr.html'))
223
- delete_file(self._output_filename.replace('.csv', '-mtr.html'))
224
- delete_file(self._output_filename.replace('.csv', '-pch.html'))
207
+ self.output_filename = f'{inp}{bas}{ref}{asm}.xxx'
208
+ self.delete_output_files()
225
209
  # initialize variables whose values are set in init method
226
- self.calc = None
227
- self.calc_base = None
228
- self.param_dict = None
229
- self.policy_dicts = []
210
+ self.calc_ref = None
211
+ self.calc_bas = None
212
+
213
+ def delete_output_files(self):
214
+ """
215
+ Delete all output files derived from self.output_filename.
216
+ """
217
+ extensions = [
218
+ '-params.bas',
219
+ '-params.ref',
220
+ '-tables.text',
221
+ '-atr.html',
222
+ '-mtr.html',
223
+ '-pch.html',
224
+ '.db',
225
+ ]
226
+ for ext in extensions:
227
+ delete_file(self.output_filename.replace('.xxx', ext))
230
228
 
231
229
  def init(self, input_data, tax_year, baseline, reform, assump,
232
230
  aging_input_data, exact_calculations):
@@ -251,13 +249,13 @@ class TaxCalcIO():
251
249
  self.errmsg = ''
252
250
  # instantiate base and reform GrowFactors objects
253
251
  if self.tmd_input_data:
254
- gfactors_base = GrowFactors(self.tmd_gfactor) # pragma: no cover
252
+ gfactors_bas = GrowFactors(self.tmd_gfactor) # pragma: no cover
255
253
  gfactors_ref = GrowFactors(self.tmd_gfactor) # pragma: no cover
256
254
  else:
257
- gfactors_base = GrowFactors()
255
+ gfactors_bas = GrowFactors()
258
256
  gfactors_ref = GrowFactors()
259
257
  # check tax_year validity
260
- max_tax_year = gfactors_base.last_year
258
+ max_tax_year = gfactors_bas.last_year
261
259
  if tax_year > max_tax_year:
262
260
  msg = f'TAXYEAR={tax_year} is greater than {max_tax_year}'
263
261
  self.errmsg += f'ERROR: {msg}\n'
@@ -278,35 +276,34 @@ class TaxCalcIO():
278
276
  # tax_year out of valid range means cannot proceed with calculations
279
277
  if self.errmsg:
280
278
  return
281
- # get policy parameter dictionary from --baseline file
282
- basedict = Calculator.read_json_param_objects(baseline, None)
283
279
  # get assumption sub-dictionaries
284
- paramdict = Calculator.read_json_param_objects(None, assump)
280
+ assumpdict = Calculator.read_json_param_objects(None, assump)
281
+ # get policy parameter dictionaries from --baseline file(s)
282
+ poldicts_bas = []
283
+ if self.specified_baseline:
284
+ for bas in baseline.split('+'):
285
+ pdict = Calculator.read_json_param_objects(bas, None)
286
+ poldicts_bas.append(pdict['policy'])
285
287
  # get policy parameter dictionaries from --reform file(s)
286
- policydicts = []
288
+ poldicts_ref = []
287
289
  if self.specified_reform:
288
- reforms = reform.split('+')
289
- for ref in reforms:
290
+ for ref in reform.split('+'):
290
291
  pdict = Calculator.read_json_param_objects(ref, None)
291
- policydicts.append(pdict['policy'])
292
- paramdict['policy'] = policydicts[0]
293
- # remember parameters for reform documentation
294
- self.param_dict = paramdict
295
- self.policy_dicts = policydicts
292
+ poldicts_ref.append(pdict['policy'])
296
293
  # set last_b_year
297
294
  last_b_year = max(tax_year, Policy.LAST_BUDGET_YEAR)
298
295
  # create gdiff_baseline object
299
296
  gdiff_baseline = GrowDiff(last_budget_year=last_b_year)
300
297
  try:
301
- gdiff_baseline.update_growdiff(paramdict['growdiff_baseline'])
298
+ gdiff_baseline.update_growdiff(assumpdict['growdiff_baseline'])
302
299
  except paramtools.ValidationError as valerr_msg:
303
300
  self.errmsg += str(valerr_msg)
304
- # apply gdiff_baseline to gfactor_base
305
- gdiff_baseline.apply_to(gfactors_base)
301
+ # apply gdiff_baseline to gfactor_bas
302
+ gdiff_baseline.apply_to(gfactors_bas)
306
303
  # specify gdiff_response object
307
304
  gdiff_response = GrowDiff(last_budget_year=last_b_year)
308
305
  try:
309
- gdiff_response.update_growdiff(paramdict['growdiff_response'])
306
+ gdiff_response.update_growdiff(assumpdict['growdiff_response'])
310
307
  except paramtools.ValidationError as valerr_msg:
311
308
  self.errmsg += str(valerr_msg)
312
309
  # apply gdiff_baseline and gdiff_response to gfactor_ref
@@ -315,57 +312,79 @@ class TaxCalcIO():
315
312
  self.gf_reform = copy.deepcopy(gfactors_ref)
316
313
  # create Policy objects:
317
314
  # ... the baseline Policy object
318
- base = Policy(gfactors=gfactors_base, last_budget_year=last_b_year)
319
- try:
320
- base.implement_reform(basedict['policy'],
321
- print_warnings=True,
322
- raise_errors=False)
323
- for _, errors in base.parameter_errors.items():
324
- self.errmsg += "\n".join(errors)
325
- except paramtools.ValidationError as valerr_msg:
326
- self.errmsg += str(valerr_msg)
315
+ if self.specified_baseline:
316
+ pol_bas = Policy(
317
+ gfactors=gfactors_bas,
318
+ last_budget_year=last_b_year,
319
+ )
320
+ for poldict in poldicts_bas:
321
+ try:
322
+ pol_bas.implement_reform(
323
+ poldict,
324
+ print_warnings=True,
325
+ raise_errors=False,
326
+ )
327
+ if self.errmsg:
328
+ self.errmsg += "\n"
329
+ for _, errors in pol_bas.parameter_errors.items():
330
+ self.errmsg += "\n".join(errors)
331
+ except paramtools.ValidationError as valerr_msg:
332
+ self.errmsg += str(valerr_msg)
333
+ else:
334
+ pol_bas = Policy(
335
+ gfactors=gfactors_bas,
336
+ last_budget_year=last_b_year,
337
+ )
327
338
  # ... the reform Policy object
328
339
  if self.specified_reform:
329
- pol = Policy(gfactors=gfactors_ref, last_budget_year=last_b_year)
330
- for poldict in policydicts:
340
+ pol_ref = Policy(
341
+ gfactors=gfactors_ref,
342
+ last_budget_year=last_b_year,
343
+ )
344
+ for poldict in poldicts_ref:
331
345
  try:
332
- pol.implement_reform(poldict,
333
- print_warnings=True,
334
- raise_errors=False)
346
+ pol_ref.implement_reform(
347
+ poldict,
348
+ print_warnings=True,
349
+ raise_errors=False,
350
+ )
335
351
  if self.errmsg:
336
352
  self.errmsg += "\n"
337
- for _, errors in pol.parameter_errors.items():
353
+ for _, errors in pol_ref.parameter_errors.items():
338
354
  self.errmsg += "\n".join(errors)
339
355
  except paramtools.ValidationError as valerr_msg:
340
356
  self.errmsg += str(valerr_msg)
341
357
  else:
342
- pol = Policy(gfactors=gfactors_base, last_budget_year=last_b_year)
358
+ pol_ref = Policy(
359
+ gfactors=gfactors_bas,
360
+ last_budget_year=last_b_year,
361
+ )
343
362
  # create Consumption object
344
363
  con = Consumption(last_budget_year=last_b_year)
345
364
  try:
346
- con.update_consumption(paramdict['consumption'])
365
+ con.update_consumption(assumpdict['consumption'])
347
366
  except paramtools.ValidationError as valerr_msg:
348
367
  self.errmsg += str(valerr_msg)
349
368
  # any errors imply cannot proceed with calculations
350
369
  if self.errmsg:
351
370
  return
352
371
  # set policy to tax_year
353
- pol.set_year(tax_year)
354
- base.set_year(tax_year)
372
+ pol_ref.set_year(tax_year)
373
+ pol_bas.set_year(tax_year)
355
374
  # read input file contents into Records objects
356
375
  if aging_input_data:
357
376
  if self.cps_input_data:
358
- recs = Records.cps_constructor(
377
+ recs_ref = Records.cps_constructor(
359
378
  gfactors=gfactors_ref,
360
- exact_calculations=exact_calculations
379
+ exact_calculations=exact_calculations,
361
380
  )
362
- recs_base = Records.cps_constructor(
363
- gfactors=gfactors_base,
364
- exact_calculations=exact_calculations
381
+ recs_bas = Records.cps_constructor(
382
+ gfactors=gfactors_bas,
383
+ exact_calculations=exact_calculations,
365
384
  )
366
385
  elif self.tmd_input_data: # pragma: no cover
367
386
  wghts = pd.read_csv(self.tmd_weights)
368
- recs = Records(
387
+ recs_ref = Records(
369
388
  data=pd.read_csv(input_data),
370
389
  start_year=Records.TMDCSV_YEAR,
371
390
  weights=wghts,
@@ -374,99 +393,97 @@ class TaxCalcIO():
374
393
  exact_calculations=exact_calculations,
375
394
  weights_scale=1.0,
376
395
  )
377
- recs_base = Records(
396
+ recs_bas = Records(
378
397
  data=pd.read_csv(input_data),
379
398
  start_year=Records.TMDCSV_YEAR,
380
399
  weights=wghts,
381
- gfactors=gfactors_base,
400
+ gfactors=gfactors_bas,
382
401
  adjust_ratios=None,
383
402
  exact_calculations=exact_calculations,
384
403
  weights_scale=1.0,
385
404
  )
386
405
  else: # if not {cps|tmd}_input_data but aging_input_data: puf
387
- recs = Records(
406
+ recs_ref = Records(
388
407
  data=input_data,
389
408
  gfactors=gfactors_ref,
390
409
  exact_calculations=exact_calculations
391
410
  )
392
- recs_base = Records(
411
+ recs_bas = Records(
393
412
  data=input_data,
394
- gfactors=gfactors_base,
413
+ gfactors=gfactors_bas,
395
414
  exact_calculations=exact_calculations
396
415
  )
397
416
  else: # input_data are raw data that are not being aged
398
- recs = Records(data=input_data,
399
- start_year=tax_year,
400
- gfactors=None,
401
- weights=None,
402
- adjust_ratios=None,
403
- exact_calculations=exact_calculations)
404
- recs_base = copy.deepcopy(recs)
417
+ recs_ref = Records(
418
+ data=input_data,
419
+ start_year=tax_year,
420
+ gfactors=None,
421
+ weights=None,
422
+ adjust_ratios=None,
423
+ exact_calculations=exact_calculations,
424
+ )
425
+ recs_bas = copy.deepcopy(recs_ref)
405
426
  # create Calculator objects
406
- self.calc = Calculator(policy=pol, records=recs,
407
- verbose=True,
408
- consumption=con,
409
- sync_years=aging_input_data)
410
- self.calc_base = Calculator(policy=base, records=recs_base,
411
- verbose=False,
412
- consumption=con,
413
- sync_years=aging_input_data)
414
-
415
- def custom_dump_variables(self, tcdumpvars_str):
416
- """
417
- Return set of variable names extracted from tcdumpvars_str, which
418
- contains the contents of the tcdumpvars file in the current directory.
419
- Also, builds self.errmsg if any custom variables are not valid.
420
- """
421
- assert isinstance(tcdumpvars_str, str)
422
- self.errmsg = ''
423
- # change some common delimiter characters into spaces
424
- dump_vars_str = tcdumpvars_str.replace(',', ' ')
425
- dump_vars_str = dump_vars_str.replace(';', ' ')
426
- dump_vars_str = dump_vars_str.replace('|', ' ')
427
- # split dump_vars_str into a list of dump variables
428
- dump_vars_list = dump_vars_str.split()
429
- # check that all dump_vars_list items are valid
430
- recs_vinfo = Records(data=None) # contains records VARINFO only
431
- valid_set = recs_vinfo.USABLE_READ_VARS | recs_vinfo.CALCULATED_VARS
432
- for var in dump_vars_list:
433
- if var not in valid_set:
434
- msg = f'invalid variable name in tcdumpvars file: {var}'
435
- self.errmsg += f'ERROR: {msg}\n'
436
- # add essential variables even if not on custom list
437
- if 'RECID' not in dump_vars_list:
438
- dump_vars_list.append('RECID')
439
- if 'FLPDYR' not in dump_vars_list:
440
- dump_vars_list.append('FLPDYR')
441
- # convert list into a set and return
442
- return set(dump_vars_list)
427
+ self.calc_ref = Calculator(
428
+ policy=pol_ref,
429
+ records=recs_ref,
430
+ verbose=(not self.silent),
431
+ consumption=con,
432
+ sync_years=aging_input_data,
433
+ )
434
+ self.calc_bas = Calculator(
435
+ policy=pol_bas,
436
+ records=recs_bas,
437
+ verbose=False,
438
+ consumption=con,
439
+ sync_years=aging_input_data,
440
+ )
443
441
 
444
442
  def tax_year(self):
445
443
  """
446
444
  Return calendar year for which TaxCalcIO calculations are being done.
447
445
  """
448
- return self.calc.current_year
446
+ return self.calc_ref.current_year
449
447
 
450
448
  def output_filepath(self):
451
449
  """
452
450
  Return full path to output file named in TaxCalcIO constructor.
453
451
  """
454
452
  dirpath = os.path.abspath(os.path.dirname(__file__))
455
- return os.path.join(dirpath, self._output_filename)
456
-
457
- def analyze(self, writing_output_file=False,
458
- output_tables=False,
459
- output_graphs=False,
460
- dump_varset=None,
461
- output_dump=False,
462
- output_sqldb=False):
453
+ return os.path.join(dirpath, self.output_filename)
454
+
455
+ def advance_to_year(self, year, aging_data):
456
+ """
457
+ Update self.output_filename and advance Calculator objects to year.
458
+ """
459
+ # update self.output_filename and delete output files
460
+ parts = self.output_filename.split('-')
461
+ parts[1] = str(year)[2:]
462
+ self.output_filename = '-'.join(parts)
463
+ self.delete_output_files()
464
+ # advance baseline and reform Calculator objects to specified year
465
+ self.calc_bas.advance_to_year(year)
466
+ self.calc_ref.advance_to_year(year)
467
+ idata = 'Advance input data and ' if aging_data else 'Advance'
468
+ if not self.silent:
469
+ print(f'{idata} policy to {year}')
470
+
471
+ def analyze(
472
+ self,
473
+ output_params=False,
474
+ output_tables=False,
475
+ output_graphs=False,
476
+ output_dump=False,
477
+ dump_varlist=None,
478
+ ):
463
479
  """
464
480
  Conduct tax analysis.
465
481
 
466
482
  Parameters
467
483
  ----------
468
- writing_output_file: boolean
469
- whether or not to generate and write output file
484
+ output_params: boolean
485
+ whether or not to write baseline and reform policy parameter
486
+ values to separate text files
470
487
 
471
488
  output_tables: boolean
472
489
  whether or not to generate and write distributional tables
@@ -476,18 +493,13 @@ class TaxCalcIO():
476
493
  whether or not to generate and write HTML graphs of average
477
494
  and marginal tax rates by income percentile
478
495
 
479
- dump_varset: set
480
- custom set of variables to include in dump and sqldb output;
481
- None implies include all variables in dump and sqldb output
482
-
483
496
  output_dump: boolean
484
- whether or not to replace standard output with all input and
485
- calculated variables using their Tax-Calculator names
497
+ whether or not to write SQLite3 database with baseline and
498
+ reform tables each containing the variables in dump_varlist.
486
499
 
487
- output_sqldb: boolean
488
- whether or not to write SQLite3 database with two tables
489
- (baseline and reform) each containing same output as written
490
- by output_dump to a csv file
500
+ dump_varlist: list
501
+ list of variables to include in dumpdb output;
502
+ list must include at least one variable.
491
503
 
492
504
  Returns
493
505
  -------
@@ -495,137 +507,77 @@ class TaxCalcIO():
495
507
  """
496
508
  # pylint: disable=too-many-arguments,too-many-positional-arguments
497
509
  # pylint: disable=too-many-branches,too-many-locals
498
- if self.puf_input_data and self.calc.reform_warnings:
499
- warn = 'PARAMETER VALUE WARNING(S): {}\n{}{}' # pragma: no cover
500
- print( # pragma: no cover
501
- warn.format('(read documentation for each parameter)',
502
- self.calc.reform_warnings,
503
- 'CONTINUING WITH CALCULATIONS...')
504
- )
505
- calc_base_calculated = False
506
- self.calc.calc_all()
507
- if output_dump or output_sqldb:
508
- # might need marginal tax rates
509
- (mtr_paytax, mtr_inctax,
510
- _) = self.calc.mtr(wrt_full_compensation=False,
511
- calc_all_already_called=True)
512
- self.calc_base.calc_all()
513
- calc_base_calculated = True
514
- (mtr_paytax_base, mtr_inctax_base,
515
- _) = self.calc_base.mtr(wrt_full_compensation=False,
516
- calc_all_already_called=True)
510
+ doing_calcs = output_tables or output_graphs or output_dump
511
+ # optionally write --params output to text files
512
+ if output_params:
513
+ self.write_policy_params_files()
514
+ if not doing_calcs:
515
+ return
516
+ # do output calculations
517
+ self.calc_bas.calc_all()
518
+ self.calc_ref.calc_all()
519
+ if output_dump:
520
+ assert isinstance(dump_varlist, list)
521
+ assert len(dump_varlist) > 0
522
+ # might need marginal tax rates for dumpdb
523
+ (mtr_ptax_ref, mtr_itax_ref,
524
+ _) = self.calc_ref.mtr(wrt_full_compensation=False,
525
+ calc_all_already_called=True)
526
+ (mtr_ptax_bas, mtr_itax_bas,
527
+ _) = self.calc_bas.mtr(wrt_full_compensation=False,
528
+ calc_all_already_called=True)
517
529
  else:
518
- # definitely do not need marginal tax rates
519
- mtr_paytax = None
520
- mtr_inctax = None
521
- mtr_paytax_base = None
522
- mtr_inctax_base = None
523
- # extract output if writing_output_file
524
- if writing_output_file:
525
- self.write_output_file(output_dump, dump_varset,
526
- mtr_paytax, mtr_inctax)
527
- self.write_doc_file()
528
- # optionally write --sqldb output to SQLite3 database
529
- if output_sqldb:
530
- self.write_sqldb_file(
531
- dump_varset, mtr_paytax, mtr_inctax,
532
- mtr_paytax_base, mtr_inctax_base
533
- )
530
+ # do not need marginal tax rates for dumpdb
531
+ mtr_ptax_ref = None
532
+ mtr_itax_ref = None
533
+ mtr_ptax_bas = None
534
+ mtr_itax_bas = None
534
535
  # optionally write --tables output to text file
535
536
  if output_tables:
536
- if not calc_base_calculated:
537
- self.calc_base.calc_all()
538
- calc_base_calculated = True
539
537
  self.write_tables_file()
540
538
  # optionally write --graphs output to HTML files
541
539
  if output_graphs:
542
- if not calc_base_calculated:
543
- self.calc_base.calc_all()
544
- calc_base_calculated = True
545
540
  self.write_graph_files()
546
-
547
- def write_output_file(self, output_dump, dump_varset,
548
- mtr_paytax, mtr_inctax):
549
- """
550
- Write output to CSV-formatted file.
551
- """
541
+ # optionally write --dumpdb output to SQLite database file
552
542
  if output_dump:
553
- outdf = self.dump_output(
554
- self.calc, dump_varset, mtr_inctax, mtr_paytax
543
+ self.write_dumpdb_file(
544
+ dump_varlist,
545
+ mtr_ptax_ref, mtr_itax_ref,
546
+ mtr_ptax_bas, mtr_itax_bas,
555
547
  )
556
- column_order = sorted(outdf.columns)
557
- # place RECID at start of column_order list
558
- assert 'RECID' in column_order, 'RECID not in dump output list'
559
- column_order.remove('RECID')
560
- column_order.insert(0, 'RECID')
561
- weight_vname = 's006'
562
- else:
563
- outdf = self.minimal_output()
564
- column_order = outdf.columns
565
- weight_vname = 'WEIGHT'
566
- assert len(outdf.index) == self.calc.array_len
567
- if self.tmd_input_data: # pragma: no cover
568
- if weight_vname in outdf:
569
- weights = outdf[weight_vname].round(5)
570
- outdf = outdf.round(2)
571
- if weight_vname in outdf:
572
- outdf[weight_vname] = weights
573
- outdf.to_csv(self._output_filename, columns=column_order,
574
- index=False)
575
- else:
576
- outdf.to_csv(self._output_filename, columns=column_order,
577
- index=False, float_format='%.2f')
578
- del outdf
579
- gc.collect()
580
548
 
581
- def write_doc_file(self):
549
+ def write_policy_params_files(self):
582
550
  """
583
- Write reform documentation to text file.
551
+ Write baseline and reform policy parameter values to separate files.
584
552
  """
585
- if len(self.policy_dicts) <= 1:
586
- doc = Calculator.reform_documentation(
587
- self.param_dict, self.gf_reform
553
+ param_names = Policy.parameter_list()
554
+ fname = self.output_filename.replace('.xxx', '-params.bas')
555
+ with open(fname, 'w', encoding='utf-8') as pfile:
556
+ for pname in param_names:
557
+ pval = self.calc_bas.policy_param(pname)
558
+ pfile.write(f'{pname} {pval}\n')
559
+ if not self.silent:
560
+ print( # pragma: no cover
561
+ f'Write baseline policy parameter values to file {fname}'
588
562
  )
589
- else:
590
- doc = Calculator.reform_documentation(
591
- self.param_dict, self.gf_reform, self.policy_dicts[1:]
563
+ fname = self.output_filename.replace('.xxx', '-params.ref')
564
+ with open(fname, 'w', encoding='utf-8') as pfile:
565
+ for pname in param_names:
566
+ pval = self.calc_ref.policy_param(pname)
567
+ pfile.write(f'{pname} {pval}\n')
568
+ if not self.silent:
569
+ print( # pragma: no cover
570
+ f'Write reform policy parameter values to file {fname}'
592
571
  )
593
- doc_fname = self._output_filename.replace('.csv', '-doc.text')
594
- with open(doc_fname, 'w', encoding='utf-8') as dfile:
595
- dfile.write(doc)
596
-
597
- def write_sqldb_file(self, dump_varset, mtr_paytax, mtr_inctax,
598
- mtr_paytax_base, mtr_inctax_base):
599
- """
600
- Write dump output to SQLite3 database table dump.
601
- """
602
- # pylint: disable=too-many-arguments,too-many-positional-arguments
603
- db_fname = self._output_filename.replace('.csv', '.db')
604
- dbcon = sqlite3.connect(db_fname)
605
- # write baseline table
606
- outdf = self.dump_output(
607
- self.calc_base, dump_varset, mtr_inctax_base, mtr_paytax_base
608
- )
609
- assert len(outdf.index) == self.calc.array_len
610
- outdf.to_sql('baseline', dbcon, if_exists='replace', index=False)
611
- # write reform table
612
- outdf = self.dump_output(
613
- self.calc, dump_varset, mtr_inctax, mtr_paytax
614
- )
615
- assert len(outdf.index) == self.calc.array_len
616
- outdf.to_sql('reform', dbcon, if_exists='replace', index=False)
617
- dbcon.close()
618
- del outdf
619
- gc.collect()
620
572
 
621
573
  def write_tables_file(self):
622
574
  """
623
575
  Write tables to text file.
624
576
  """
625
577
  # pylint: disable=too-many-locals
626
- tab_fname = self._output_filename.replace('.csv', '-tab.text')
578
+ tab_fname = self.output_filename.replace('.xxx', '-tables.text')
627
579
  # skip tables if there are not some positive weights
628
- if self.calc_base.total_weight() <= 0.:
580
+ if self.calc_bas.total_weight() <= 0.:
629
581
  with open(tab_fname, 'w', encoding='utf-8') as tfile:
630
582
  msg = 'No tables because sum of weights is not positive\n'
631
583
  tfile.write(msg)
@@ -634,16 +586,16 @@ class TaxCalcIO():
634
586
  # - weights don't change with reform
635
587
  # - expanded_income may change, so always use baseline expanded income
636
588
  nontax_vars = ['s006', 'expanded_income']
637
- nontax = [self.calc_base.array(var) for var in nontax_vars]
589
+ nontax = [self.calc_bas.array(var) for var in nontax_vars]
638
590
  # create list of results for tax variables from reform Calculator
639
591
  tax_vars = ['iitax', 'payrolltax', 'lumpsum_tax', 'combined']
640
- reform = [self.calc.array(var) for var in tax_vars]
592
+ reform = [self.calc_ref.array(var) for var in tax_vars]
641
593
  # create DataFrame with tax distribution under reform
642
594
  dist = nontax + reform # using expanded_income under baseline policy
643
595
  all_vars = nontax_vars + tax_vars
644
596
  distdf = pd.DataFrame(data=np.column_stack(dist), columns=all_vars)
645
597
  # create DataFrame with tax differences (reform - baseline)
646
- base = [self.calc_base.array(var) for var in tax_vars]
598
+ base = [self.calc_bas.array(var) for var in tax_vars]
647
599
  change = [(reform[idx] - base[idx]) for idx in range(0, len(tax_vars))]
648
600
  diff = nontax + change # using expanded_income under baseline policy
649
601
  diffdf = pd.DataFrame(data=np.column_stack(diff), columns=all_vars)
@@ -656,6 +608,10 @@ class TaxCalcIO():
656
608
  del distdf
657
609
  del diffdf
658
610
  gc.collect()
611
+ if not self.silent:
612
+ print( # pragma: no cover
613
+ f'Write tabular output to file {tab_fname}'
614
+ )
659
615
 
660
616
  @staticmethod
661
617
  def write_decile_table(dfx, tfile, tkind='Totals'):
@@ -737,32 +693,32 @@ class TaxCalcIO():
737
693
  Write graphs to HTML files.
738
694
  All graphs contain same number of filing units in each quantile.
739
695
  """
740
- pos_wght_sum = self.calc.total_weight() > 0.0
696
+ pos_wght_sum = self.calc_ref.total_weight() > 0.0
741
697
  fig = None
742
698
  # percentage-aftertax-income-change graph
743
- pch_fname = self._output_filename.replace('.csv', '-pch.html')
699
+ pch_fname = self.output_filename.replace('.xxx', '-pch.html')
744
700
  pch_title = 'PCH by Income Percentile'
745
701
  if pos_wght_sum:
746
- fig = self.calc_base.pch_graph(self.calc, pop_quantiles=False)
702
+ fig = self.calc_bas.pch_graph(self.calc_ref, pop_quantiles=False)
747
703
  write_graph_file(fig, pch_fname, pch_title)
748
704
  else:
749
705
  reason = 'No graph because sum of weights is not positive'
750
706
  TaxCalcIO.write_empty_graph_file(pch_fname, pch_title, reason)
751
707
  # average-tax-rate graph
752
- atr_fname = self._output_filename.replace('.csv', '-atr.html')
708
+ atr_fname = self.output_filename.replace('.xxx', '-atr.html')
753
709
  atr_title = 'ATR by Income Percentile'
754
710
  if pos_wght_sum:
755
- fig = self.calc_base.atr_graph(self.calc, pop_quantiles=False)
711
+ fig = self.calc_bas.atr_graph(self.calc_ref, pop_quantiles=False)
756
712
  write_graph_file(fig, atr_fname, atr_title)
757
713
  else:
758
714
  reason = 'No graph because sum of weights is not positive'
759
715
  TaxCalcIO.write_empty_graph_file(atr_fname, atr_title, reason)
760
716
  # marginal-tax-rate graph
761
- mtr_fname = self._output_filename.replace('.csv', '-mtr.html')
717
+ mtr_fname = self.output_filename.replace('.xxx', '-mtr.html')
762
718
  mtr_title = 'MTR by Income Percentile'
763
719
  if pos_wght_sum:
764
- fig = self.calc_base.mtr_graph(
765
- self.calc,
720
+ fig = self.calc_bas.mtr_graph(
721
+ self.calc_ref,
766
722
  alt_e00200p_text='Taxpayer Earnings',
767
723
  pop_quantiles=False
768
724
  )
@@ -773,6 +729,12 @@ class TaxCalcIO():
773
729
  if fig:
774
730
  del fig
775
731
  gc.collect()
732
+ if not self.silent:
733
+ print( # pragma: no cover
734
+ f'Write graphical output to file {pch_fname}\n'
735
+ f'Write graphical output to file {atr_fname}\n'
736
+ f'Write graphical output to file {mtr_fname}'
737
+ )
776
738
 
777
739
  @staticmethod
778
740
  def write_empty_graph_file(fname, title, reason):
@@ -788,45 +750,108 @@ class TaxCalcIO():
788
750
  with open(fname, 'w', encoding='utf-8') as gfile:
789
751
  gfile.write(txt)
790
752
 
791
- def minimal_output(self):
753
+ BASE_DUMPVARS = [
754
+ 'RECID',
755
+ 's006',
756
+ 'data_source',
757
+ 'XTOT',
758
+ 'MARS',
759
+ 'expanded_income',
760
+ ]
761
+ MINIMAL_DUMPVARS = [
762
+ 'RECID',
763
+ 'iitax',
764
+ ]
765
+ MTR_DUMPVARS = [
766
+ 'mtr_itax',
767
+ 'mtr_ptax',
768
+ ]
769
+
770
+ def dump_variables(self, dumpvars_str):
792
771
  """
793
- Extract minimal output and return it as Pandas DataFrame.
772
+ Return list of variable names extracted from dumpvars_str, plus
773
+ minimal baseline/reform variables even if not in dumpvars_str.
774
+ Also, builds self.errmsg if any specified variables are not valid.
794
775
  """
795
- varlist = ['RECID', 'YEAR', 'WEIGHT', 'INCTAX', 'LSTAX', 'PAYTAX']
796
- odict = {}
797
- scalc = self.calc
798
- odict['RECID'] = scalc.array('RECID') # id for tax filing unit
799
- odict['YEAR'] = self.tax_year() # tax calculation year
800
- odict['WEIGHT'] = scalc.array('s006') # sample weight
801
- odict['INCTAX'] = scalc.array('iitax') # federal income taxes
802
- odict['LSTAX'] = scalc.array('lumpsum_tax') # lump-sum tax
803
- odict['PAYTAX'] = scalc.array('payrolltax') # payroll taxes (ee+er)
804
- odf = pd.DataFrame(data=odict, columns=varlist)
805
- return odf
806
-
807
- def dump_output(self, calcx, dump_varset, mtr_inctax, mtr_paytax):
776
+ assert isinstance(dumpvars_str, str)
777
+ self.errmsg = ''
778
+ # change some common non-space delimiter characters into spaces
779
+ dumpvars_str = dumpvars_str.replace(',', ' ')
780
+ dumpvars_str = dumpvars_str.replace(';', ' ')
781
+ dumpvars_str = dumpvars_str.replace('|', ' ')
782
+ # split dumpvars_str into a set of dump variables
783
+ dumpvars = dumpvars_str.split()
784
+ # check that all dumpvars items are valid
785
+ recs_vinfo = Records(data=None) # contains records VARINFO only
786
+ valid_set = recs_vinfo.USABLE_READ_VARS | recs_vinfo.CALCULATED_VARS
787
+ for var in dumpvars:
788
+ if var not in valid_set and var not in TaxCalcIO.MTR_DUMPVARS:
789
+ msg = f'invalid variable name {var} in DUMPVARS file'
790
+ self.errmsg += f'ERROR: {msg}\n'
791
+ if self.errmsg:
792
+ return []
793
+ # construct variable list
794
+ dumpvars_list = TaxCalcIO.MINIMAL_DUMPVARS
795
+ for var in dumpvars:
796
+ if var not in dumpvars_list and var not in TaxCalcIO.BASE_DUMPVARS:
797
+ dumpvars_list.append(var)
798
+ return dumpvars_list
799
+
800
+ def write_dumpdb_file(
801
+ self,
802
+ dump_varlist,
803
+ mtr_ptax_ref, mtr_itax_ref,
804
+ mtr_ptax_bas, mtr_itax_bas,
805
+ ):
808
806
  """
809
- Extract dump output and return it as Pandas DataFrame.
807
+ Write dump output to SQLite database file.
810
808
  """
811
- recs_vinfo = Records(data=None) # contains only Records VARINFO
812
- if dump_varset is None:
813
- varset = recs_vinfo.USABLE_READ_VARS | recs_vinfo.CALCULATED_VARS
814
- else:
815
- varset = dump_varset
816
- # create and return dump output DataFrame
817
- odf = pd.DataFrame()
818
- for varname in varset:
819
- vardata = calcx.array(varname)
820
- if varname in recs_vinfo.INTEGER_VARS:
821
- odf[varname] = vardata
822
- else: # specify precision that can handle small TMD area weights
823
- odf[varname] = vardata.round(5)
824
- odf = odf.copy()
825
- # specify mtr values in percentage terms
826
- if 'mtr_inctax' in varset:
827
- odf['mtr_inctax'] = (mtr_inctax * 100).round(2)
828
- if 'mtr_paytax' in varset:
829
- odf['mtr_paytax'] = (mtr_paytax * 100).round(2)
830
- # specify tax calculation year
831
- odf['FLPDYR'] = self.tax_year()
832
- return odf
809
+ # pylint: disable=too-many-arguments,too-many-positional-arguments
810
+ def dump_output(calcx, dumpvars, mtr_itax, mtr_ptax):
811
+ """
812
+ Extract dump output from calcx and return it as Pandas DataFrame.
813
+ """
814
+ odf = pd.DataFrame()
815
+ for var in dumpvars:
816
+ if var in TaxCalcIO.MTR_DUMPVARS:
817
+ if var == 'mtr_itax':
818
+ odf[var] = mtr_itax
819
+ elif var == 'mtr_ptax':
820
+ odf[var] = mtr_ptax
821
+ else:
822
+ odf[var] = calcx.array(var)
823
+ return odf
824
+ # begin main logic
825
+ assert isinstance(dump_varlist, list)
826
+ assert len(dump_varlist) > 0
827
+ db_fname = self.output_filename.replace('.xxx', '.db')
828
+ dbcon = sqlite3.connect(db_fname)
829
+ # write base table
830
+ outdf = pd.DataFrame()
831
+ for var in TaxCalcIO.BASE_DUMPVARS:
832
+ outdf[var] = self.calc_bas.array(var)
833
+ outdf['income_group'] = 0
834
+ assert len(outdf.index) == self.calc_bas.array_len
835
+ outdf.to_sql('base', dbcon, index=False)
836
+ del outdf
837
+ # write baseline table
838
+ outdf = dump_output(
839
+ self.calc_bas, dump_varlist, mtr_itax_bas, mtr_ptax_bas,
840
+ )
841
+ assert len(outdf.index) == self.calc_bas.array_len
842
+ outdf.to_sql('baseline', dbcon, index=False)
843
+ del outdf
844
+ # write reform table
845
+ outdf = dump_output(
846
+ self.calc_ref, dump_varlist, mtr_itax_ref, mtr_ptax_ref,
847
+ )
848
+ assert len(outdf.index) == self.calc_ref.array_len
849
+ outdf.to_sql('reform', dbcon, index=False)
850
+ del outdf
851
+ dbcon.close()
852
+ del dbcon
853
+ gc.collect()
854
+ if not self.silent:
855
+ print( # pragma: no cover
856
+ f'Write dump output to sqlite3 database file {db_fname}'
857
+ )