taxcalc 4.5.0__py3-none-any.whl → 4.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
taxcalc/taxcalcio.py CHANGED
@@ -54,9 +54,8 @@ class TaxCalcIO():
54
54
  None implies economic assumptions are standard assumptions,
55
55
  or string is name of optional ASSUMP file.
56
56
 
57
- outdir: None or string
58
- None implies output files written to current directory,
59
- or string is name of optional output directory
57
+ silent: boolean
58
+ whether or not to suppress action messages.
60
59
 
61
60
  Returns
62
61
  -------
@@ -65,9 +64,10 @@ class TaxCalcIO():
65
64
  # pylint: disable=too-many-instance-attributes
66
65
 
67
66
  def __init__(self, input_data, tax_year, baseline, reform, assump,
68
- outdir=None):
67
+ silent=True):
69
68
  # pylint: disable=too-many-arguments,too-many-positional-arguments
70
69
  # pylint: disable=too-many-branches,too-many-statements,too-many-locals
70
+ self.silent = silent
71
71
  self.gf_reform = None
72
72
  self.errmsg = ''
73
73
  # check name and existence of INPUT file
@@ -189,44 +189,22 @@ class TaxCalcIO():
189
189
  else:
190
190
  msg = 'TaxCalcIO.ctor: assump is neither None nor str'
191
191
  self.errmsg += f'ERROR: {msg}\n'
192
- # check name and existence of OUTDIR
193
- if outdir is None:
194
- valid_outdir = True
195
- elif isinstance(outdir, str):
196
- # check existence of OUTDIR
197
- if os.path.isdir(outdir):
198
- valid_outdir = True
199
- else:
200
- valid_outdir = False
201
- msg = 'OUTDIR could not be found'
202
- self.errmsg += f'ERROR: {msg}\n'
203
- else:
204
- valid_outdir = False
205
- msg = 'TaxCalcIO.ctor: outdir is neither None nor str'
206
- self.errmsg += f'ERROR: {msg}\n'
207
192
  # create OUTPUT file name and delete any existing output files
208
- output_filename = f'{inp}{bas}{ref}{asm}.csv'
209
- if outdir is None:
210
- self._output_filename = output_filename
211
- delete_old_files = True
212
- elif valid_outdir:
213
- self._output_filename = os.path.join(outdir, output_filename)
214
- delete_old_files = True
215
- else:
216
- delete_old_files = False
217
- if delete_old_files:
218
- delete_file(self._output_filename)
219
- delete_file(self._output_filename.replace('.csv', '.db'))
220
- delete_file(self._output_filename.replace('.csv', '-doc.text'))
221
- delete_file(self._output_filename.replace('.csv', '-tab.text'))
222
- delete_file(self._output_filename.replace('.csv', '-atr.html'))
223
- delete_file(self._output_filename.replace('.csv', '-mtr.html'))
224
- delete_file(self._output_filename.replace('.csv', '-pch.html'))
193
+ self.output_filename = f'{inp}{bas}{ref}{asm}.xxx'
194
+ extensions = [
195
+ '-params.bas',
196
+ '-params.ref',
197
+ '-tables.text',
198
+ '-atr.html',
199
+ '-mtr.html',
200
+ '-pch.html',
201
+ '.db',
202
+ ]
203
+ for ext in extensions:
204
+ delete_file(self.output_filename.replace('.xxx', ext))
225
205
  # initialize variables whose values are set in init method
226
- self.calc = None
227
- self.calc_base = None
228
- self.param_dict = None
229
- self.policy_dicts = []
206
+ self.calc_ref = None
207
+ self.calc_bas = None
230
208
 
231
209
  def init(self, input_data, tax_year, baseline, reform, assump,
232
210
  aging_input_data, exact_calculations):
@@ -251,13 +229,13 @@ class TaxCalcIO():
251
229
  self.errmsg = ''
252
230
  # instantiate base and reform GrowFactors objects
253
231
  if self.tmd_input_data:
254
- gfactors_base = GrowFactors(self.tmd_gfactor) # pragma: no cover
232
+ gfactors_bas = GrowFactors(self.tmd_gfactor) # pragma: no cover
255
233
  gfactors_ref = GrowFactors(self.tmd_gfactor) # pragma: no cover
256
234
  else:
257
- gfactors_base = GrowFactors()
235
+ gfactors_bas = GrowFactors()
258
236
  gfactors_ref = GrowFactors()
259
237
  # check tax_year validity
260
- max_tax_year = gfactors_base.last_year
238
+ max_tax_year = gfactors_bas.last_year
261
239
  if tax_year > max_tax_year:
262
240
  msg = f'TAXYEAR={tax_year} is greater than {max_tax_year}'
263
241
  self.errmsg += f'ERROR: {msg}\n'
@@ -278,35 +256,30 @@ class TaxCalcIO():
278
256
  # tax_year out of valid range means cannot proceed with calculations
279
257
  if self.errmsg:
280
258
  return
281
- # get policy parameter dictionary from --baseline file
282
- basedict = Calculator.read_json_param_objects(baseline, None)
283
259
  # get assumption sub-dictionaries
284
- paramdict = Calculator.read_json_param_objects(None, assump)
260
+ assumpdict = Calculator.read_json_param_objects(None, assump)
261
+ # get policy parameter dictionary from --baseline file
262
+ poldict_bas = Calculator.read_json_param_objects(baseline, None)
285
263
  # get policy parameter dictionaries from --reform file(s)
286
- policydicts = []
264
+ poldicts_ref = []
287
265
  if self.specified_reform:
288
- reforms = reform.split('+')
289
- for ref in reforms:
266
+ for ref in reform.split('+'):
290
267
  pdict = Calculator.read_json_param_objects(ref, None)
291
- policydicts.append(pdict['policy'])
292
- paramdict['policy'] = policydicts[0]
293
- # remember parameters for reform documentation
294
- self.param_dict = paramdict
295
- self.policy_dicts = policydicts
268
+ poldicts_ref.append(pdict['policy'])
296
269
  # set last_b_year
297
270
  last_b_year = max(tax_year, Policy.LAST_BUDGET_YEAR)
298
271
  # create gdiff_baseline object
299
272
  gdiff_baseline = GrowDiff(last_budget_year=last_b_year)
300
273
  try:
301
- gdiff_baseline.update_growdiff(paramdict['growdiff_baseline'])
274
+ gdiff_baseline.update_growdiff(assumpdict['growdiff_baseline'])
302
275
  except paramtools.ValidationError as valerr_msg:
303
276
  self.errmsg += str(valerr_msg)
304
- # apply gdiff_baseline to gfactor_base
305
- gdiff_baseline.apply_to(gfactors_base)
277
+ # apply gdiff_baseline to gfactor_bas
278
+ gdiff_baseline.apply_to(gfactors_bas)
306
279
  # specify gdiff_response object
307
280
  gdiff_response = GrowDiff(last_budget_year=last_b_year)
308
281
  try:
309
- gdiff_response.update_growdiff(paramdict['growdiff_response'])
282
+ gdiff_response.update_growdiff(assumpdict['growdiff_response'])
310
283
  except paramtools.ValidationError as valerr_msg:
311
284
  self.errmsg += str(valerr_msg)
312
285
  # apply gdiff_baseline and gdiff_response to gfactor_ref
@@ -315,57 +288,70 @@ class TaxCalcIO():
315
288
  self.gf_reform = copy.deepcopy(gfactors_ref)
316
289
  # create Policy objects:
317
290
  # ... the baseline Policy object
318
- base = Policy(gfactors=gfactors_base, last_budget_year=last_b_year)
291
+ pol_bas = Policy(
292
+ gfactors=gfactors_bas,
293
+ last_budget_year=last_b_year,
294
+ )
319
295
  try:
320
- base.implement_reform(basedict['policy'],
321
- print_warnings=True,
322
- raise_errors=False)
323
- for _, errors in base.parameter_errors.items():
296
+ pol_bas.implement_reform(
297
+ poldict_bas['policy'],
298
+ print_warnings=True,
299
+ raise_errors=False,
300
+ )
301
+ for _, errors in pol_bas.parameter_errors.items():
324
302
  self.errmsg += "\n".join(errors)
325
303
  except paramtools.ValidationError as valerr_msg:
326
304
  self.errmsg += str(valerr_msg)
327
305
  # ... the reform Policy object
328
306
  if self.specified_reform:
329
- pol = Policy(gfactors=gfactors_ref, last_budget_year=last_b_year)
330
- for poldict in policydicts:
307
+ pol_ref = Policy(
308
+ gfactors=gfactors_ref,
309
+ last_budget_year=last_b_year,
310
+ )
311
+ for poldict in poldicts_ref:
331
312
  try:
332
- pol.implement_reform(poldict,
333
- print_warnings=True,
334
- raise_errors=False)
313
+ pol_ref.implement_reform(
314
+ poldict,
315
+ print_warnings=True,
316
+ raise_errors=False,
317
+ )
335
318
  if self.errmsg:
336
319
  self.errmsg += "\n"
337
- for _, errors in pol.parameter_errors.items():
320
+ for _, errors in pol_ref.parameter_errors.items():
338
321
  self.errmsg += "\n".join(errors)
339
322
  except paramtools.ValidationError as valerr_msg:
340
323
  self.errmsg += str(valerr_msg)
341
324
  else:
342
- pol = Policy(gfactors=gfactors_base, last_budget_year=last_b_year)
325
+ pol_ref = Policy(
326
+ gfactors=gfactors_bas,
327
+ last_budget_year=last_b_year,
328
+ )
343
329
  # create Consumption object
344
330
  con = Consumption(last_budget_year=last_b_year)
345
331
  try:
346
- con.update_consumption(paramdict['consumption'])
332
+ con.update_consumption(assumpdict['consumption'])
347
333
  except paramtools.ValidationError as valerr_msg:
348
334
  self.errmsg += str(valerr_msg)
349
335
  # any errors imply cannot proceed with calculations
350
336
  if self.errmsg:
351
337
  return
352
338
  # set policy to tax_year
353
- pol.set_year(tax_year)
354
- base.set_year(tax_year)
339
+ pol_ref.set_year(tax_year)
340
+ pol_bas.set_year(tax_year)
355
341
  # read input file contents into Records objects
356
342
  if aging_input_data:
357
343
  if self.cps_input_data:
358
- recs = Records.cps_constructor(
344
+ recs_ref = Records.cps_constructor(
359
345
  gfactors=gfactors_ref,
360
- exact_calculations=exact_calculations
346
+ exact_calculations=exact_calculations,
361
347
  )
362
- recs_base = Records.cps_constructor(
363
- gfactors=gfactors_base,
364
- exact_calculations=exact_calculations
348
+ recs_bas = Records.cps_constructor(
349
+ gfactors=gfactors_bas,
350
+ exact_calculations=exact_calculations,
365
351
  )
366
352
  elif self.tmd_input_data: # pragma: no cover
367
353
  wghts = pd.read_csv(self.tmd_weights)
368
- recs = Records(
354
+ recs_ref = Records(
369
355
  data=pd.read_csv(input_data),
370
356
  start_year=Records.TMDCSV_YEAR,
371
357
  weights=wghts,
@@ -374,99 +360,81 @@ class TaxCalcIO():
374
360
  exact_calculations=exact_calculations,
375
361
  weights_scale=1.0,
376
362
  )
377
- recs_base = Records(
363
+ recs_bas = Records(
378
364
  data=pd.read_csv(input_data),
379
365
  start_year=Records.TMDCSV_YEAR,
380
366
  weights=wghts,
381
- gfactors=gfactors_base,
367
+ gfactors=gfactors_bas,
382
368
  adjust_ratios=None,
383
369
  exact_calculations=exact_calculations,
384
370
  weights_scale=1.0,
385
371
  )
386
372
  else: # if not {cps|tmd}_input_data but aging_input_data: puf
387
- recs = Records(
373
+ recs_ref = Records(
388
374
  data=input_data,
389
375
  gfactors=gfactors_ref,
390
376
  exact_calculations=exact_calculations
391
377
  )
392
- recs_base = Records(
378
+ recs_bas = Records(
393
379
  data=input_data,
394
- gfactors=gfactors_base,
380
+ gfactors=gfactors_bas,
395
381
  exact_calculations=exact_calculations
396
382
  )
397
383
  else: # input_data are raw data that are not being aged
398
- recs = Records(data=input_data,
399
- start_year=tax_year,
400
- gfactors=None,
401
- weights=None,
402
- adjust_ratios=None,
403
- exact_calculations=exact_calculations)
404
- recs_base = copy.deepcopy(recs)
384
+ recs_ref = Records(
385
+ data=input_data,
386
+ start_year=tax_year,
387
+ gfactors=None,
388
+ weights=None,
389
+ adjust_ratios=None,
390
+ exact_calculations=exact_calculations,
391
+ )
392
+ recs_bas = copy.deepcopy(recs_ref)
405
393
  # create Calculator objects
406
- self.calc = Calculator(policy=pol, records=recs,
407
- verbose=True,
408
- consumption=con,
409
- sync_years=aging_input_data)
410
- self.calc_base = Calculator(policy=base, records=recs_base,
411
- verbose=False,
412
- consumption=con,
413
- sync_years=aging_input_data)
414
-
415
- def custom_dump_variables(self, tcdumpvars_str):
416
- """
417
- Return set of variable names extracted from tcdumpvars_str, which
418
- contains the contents of the tcdumpvars file in the current directory.
419
- Also, builds self.errmsg if any custom variables are not valid.
420
- """
421
- assert isinstance(tcdumpvars_str, str)
422
- self.errmsg = ''
423
- # change some common delimiter characters into spaces
424
- dump_vars_str = tcdumpvars_str.replace(',', ' ')
425
- dump_vars_str = dump_vars_str.replace(';', ' ')
426
- dump_vars_str = dump_vars_str.replace('|', ' ')
427
- # split dump_vars_str into a list of dump variables
428
- dump_vars_list = dump_vars_str.split()
429
- # check that all dump_vars_list items are valid
430
- recs_vinfo = Records(data=None) # contains records VARINFO only
431
- valid_set = recs_vinfo.USABLE_READ_VARS | recs_vinfo.CALCULATED_VARS
432
- for var in dump_vars_list:
433
- if var not in valid_set:
434
- msg = f'invalid variable name in tcdumpvars file: {var}'
435
- self.errmsg += f'ERROR: {msg}\n'
436
- # add essential variables even if not on custom list
437
- if 'RECID' not in dump_vars_list:
438
- dump_vars_list.append('RECID')
439
- if 'FLPDYR' not in dump_vars_list:
440
- dump_vars_list.append('FLPDYR')
441
- # convert list into a set and return
442
- return set(dump_vars_list)
394
+ self.calc_ref = Calculator(
395
+ policy=pol_ref,
396
+ records=recs_ref,
397
+ verbose=(not self.silent),
398
+ consumption=con,
399
+ sync_years=aging_input_data,
400
+ )
401
+ self.calc_bas = Calculator(
402
+ policy=pol_bas,
403
+ records=recs_bas,
404
+ verbose=False,
405
+ consumption=con,
406
+ sync_years=aging_input_data,
407
+ )
443
408
 
444
409
  def tax_year(self):
445
410
  """
446
411
  Return calendar year for which TaxCalcIO calculations are being done.
447
412
  """
448
- return self.calc.current_year
413
+ return self.calc_ref.current_year
449
414
 
450
415
  def output_filepath(self):
451
416
  """
452
417
  Return full path to output file named in TaxCalcIO constructor.
453
418
  """
454
419
  dirpath = os.path.abspath(os.path.dirname(__file__))
455
- return os.path.join(dirpath, self._output_filename)
456
-
457
- def analyze(self, writing_output_file=False,
458
- output_tables=False,
459
- output_graphs=False,
460
- dump_varset=None,
461
- output_dump=False,
462
- output_sqldb=False):
420
+ return os.path.join(dirpath, self.output_filename)
421
+
422
+ def analyze(
423
+ self,
424
+ output_params=False,
425
+ output_tables=False,
426
+ output_graphs=False,
427
+ output_dump=False,
428
+ dump_varlist=None,
429
+ ):
463
430
  """
464
431
  Conduct tax analysis.
465
432
 
466
433
  Parameters
467
434
  ----------
468
- writing_output_file: boolean
469
- whether or not to generate and write output file
435
+ output_params: boolean
436
+ whether or not to write baseline and reform policy parameter
437
+ values to separate text files
470
438
 
471
439
  output_tables: boolean
472
440
  whether or not to generate and write distributional tables
@@ -476,18 +444,13 @@ class TaxCalcIO():
476
444
  whether or not to generate and write HTML graphs of average
477
445
  and marginal tax rates by income percentile
478
446
 
479
- dump_varset: set
480
- custom set of variables to include in dump and sqldb output;
481
- None implies include all variables in dump and sqldb output
482
-
483
447
  output_dump: boolean
484
- whether or not to replace standard output with all input and
485
- calculated variables using their Tax-Calculator names
448
+ whether or not to write SQLite3 database with baseline and
449
+ reform tables each containing the variables in dump_varlist.
486
450
 
487
- output_sqldb: boolean
488
- whether or not to write SQLite3 database with two tables
489
- (baseline and reform) each containing same output as written
490
- by output_dump to a csv file
451
+ dump_varlist: list
452
+ list of variables to include in dumpdb output;
453
+ list must include at least one variable.
491
454
 
492
455
  Returns
493
456
  -------
@@ -495,137 +458,88 @@ class TaxCalcIO():
495
458
  """
496
459
  # pylint: disable=too-many-arguments,too-many-positional-arguments
497
460
  # pylint: disable=too-many-branches,too-many-locals
498
- if self.puf_input_data and self.calc.reform_warnings:
499
- warn = 'PARAMETER VALUE WARNING(S): {}\n{}{}' # pragma: no cover
461
+ if self.puf_input_data and self.calc_ref.reform_warnings:
500
462
  print( # pragma: no cover
501
- warn.format('(read documentation for each parameter)',
502
- self.calc.reform_warnings,
503
- 'CONTINUING WITH CALCULATIONS...')
463
+ 'PARAMETER VALUE WARNING(S): '
464
+ '(read documentation for each parameter)\n'
465
+ f'{self.calc_ref.reform_warnings}'
466
+ 'CONTINUING WITH CALCULATIONS...'
504
467
  )
505
468
  calc_base_calculated = False
506
- self.calc.calc_all()
507
- if output_dump or output_sqldb:
469
+ self.calc_ref.calc_all()
470
+ if output_dump:
471
+ assert isinstance(dump_varlist, list)
472
+ assert len(dump_varlist) > 0
508
473
  # might need marginal tax rates
509
- (mtr_paytax, mtr_inctax,
510
- _) = self.calc.mtr(wrt_full_compensation=False,
511
- calc_all_already_called=True)
512
- self.calc_base.calc_all()
474
+ (mtr_ptax_ref, mtr_itax_ref,
475
+ _) = self.calc_ref.mtr(wrt_full_compensation=False,
476
+ calc_all_already_called=True)
477
+ self.calc_bas.calc_all()
513
478
  calc_base_calculated = True
514
- (mtr_paytax_base, mtr_inctax_base,
515
- _) = self.calc_base.mtr(wrt_full_compensation=False,
516
- calc_all_already_called=True)
479
+ (mtr_ptax_bas, mtr_itax_bas,
480
+ _) = self.calc_bas.mtr(wrt_full_compensation=False,
481
+ calc_all_already_called=True)
517
482
  else:
518
483
  # definitely do not need marginal tax rates
519
- mtr_paytax = None
520
- mtr_inctax = None
521
- mtr_paytax_base = None
522
- mtr_inctax_base = None
523
- # extract output if writing_output_file
524
- if writing_output_file:
525
- self.write_output_file(output_dump, dump_varset,
526
- mtr_paytax, mtr_inctax)
527
- self.write_doc_file()
528
- # optionally write --sqldb output to SQLite3 database
529
- if output_sqldb:
530
- self.write_sqldb_file(
531
- dump_varset, mtr_paytax, mtr_inctax,
532
- mtr_paytax_base, mtr_inctax_base
533
- )
484
+ mtr_ptax_ref = None
485
+ mtr_itax_ref = None
486
+ mtr_ptax_bas = None
487
+ mtr_itax_bas = None
488
+ # optionally write --params output to text files
489
+ if output_params:
490
+ self.write_policy_params_files()
534
491
  # optionally write --tables output to text file
535
492
  if output_tables:
536
493
  if not calc_base_calculated:
537
- self.calc_base.calc_all()
494
+ self.calc_bas.calc_all()
538
495
  calc_base_calculated = True
539
496
  self.write_tables_file()
540
497
  # optionally write --graphs output to HTML files
541
498
  if output_graphs:
542
499
  if not calc_base_calculated:
543
- self.calc_base.calc_all()
500
+ self.calc_bas.calc_all()
544
501
  calc_base_calculated = True
545
502
  self.write_graph_files()
546
-
547
- def write_output_file(self, output_dump, dump_varset,
548
- mtr_paytax, mtr_inctax):
549
- """
550
- Write output to CSV-formatted file.
551
- """
503
+ # optionally write --dumpdb output to SQLite database file
552
504
  if output_dump:
553
- outdf = self.dump_output(
554
- self.calc, dump_varset, mtr_inctax, mtr_paytax
505
+ self.write_dumpdb_file(
506
+ dump_varlist,
507
+ mtr_ptax_ref, mtr_itax_ref,
508
+ mtr_ptax_bas, mtr_itax_bas,
555
509
  )
556
- column_order = sorted(outdf.columns)
557
- # place RECID at start of column_order list
558
- assert 'RECID' in column_order, 'RECID not in dump output list'
559
- column_order.remove('RECID')
560
- column_order.insert(0, 'RECID')
561
- weight_vname = 's006'
562
- else:
563
- outdf = self.minimal_output()
564
- column_order = outdf.columns
565
- weight_vname = 'WEIGHT'
566
- assert len(outdf.index) == self.calc.array_len
567
- if self.tmd_input_data: # pragma: no cover
568
- if weight_vname in outdf:
569
- weights = outdf[weight_vname].round(5)
570
- outdf = outdf.round(2)
571
- if weight_vname in outdf:
572
- outdf[weight_vname] = weights
573
- outdf.to_csv(self._output_filename, columns=column_order,
574
- index=False)
575
- else:
576
- outdf.to_csv(self._output_filename, columns=column_order,
577
- index=False, float_format='%.2f')
578
- del outdf
579
- gc.collect()
580
510
 
581
- def write_doc_file(self):
511
+ def write_policy_params_files(self):
582
512
  """
583
- Write reform documentation to text file.
513
+ Write baseline and reform policy parameter values to separate files.
584
514
  """
585
- if len(self.policy_dicts) <= 1:
586
- doc = Calculator.reform_documentation(
587
- self.param_dict, self.gf_reform
515
+ param_names = Policy.parameter_list()
516
+ fname = self.output_filename.replace('.xxx', '-params.bas')
517
+ with open(fname, 'w', encoding='utf-8') as pfile:
518
+ for pname in param_names:
519
+ pval = self.calc_bas.policy_param(pname)
520
+ pfile.write(f'{pname} {pval}\n')
521
+ if not self.silent:
522
+ print( # pragma: no cover
523
+ f'Write baseline policy parameter values to file {fname}'
588
524
  )
589
- else:
590
- doc = Calculator.reform_documentation(
591
- self.param_dict, self.gf_reform, self.policy_dicts[1:]
525
+ fname = self.output_filename.replace('.xxx', '-params.ref')
526
+ with open(fname, 'w', encoding='utf-8') as pfile:
527
+ for pname in param_names:
528
+ pval = self.calc_ref.policy_param(pname)
529
+ pfile.write(f'{pname} {pval}\n')
530
+ if not self.silent:
531
+ print( # pragma: no cover
532
+ f'Write reform policy parameter values to file {fname}'
592
533
  )
593
- doc_fname = self._output_filename.replace('.csv', '-doc.text')
594
- with open(doc_fname, 'w', encoding='utf-8') as dfile:
595
- dfile.write(doc)
596
-
597
- def write_sqldb_file(self, dump_varset, mtr_paytax, mtr_inctax,
598
- mtr_paytax_base, mtr_inctax_base):
599
- """
600
- Write dump output to SQLite3 database table dump.
601
- """
602
- # pylint: disable=too-many-arguments,too-many-positional-arguments
603
- db_fname = self._output_filename.replace('.csv', '.db')
604
- dbcon = sqlite3.connect(db_fname)
605
- # write baseline table
606
- outdf = self.dump_output(
607
- self.calc_base, dump_varset, mtr_inctax_base, mtr_paytax_base
608
- )
609
- assert len(outdf.index) == self.calc.array_len
610
- outdf.to_sql('baseline', dbcon, if_exists='replace', index=False)
611
- # write reform table
612
- outdf = self.dump_output(
613
- self.calc, dump_varset, mtr_inctax, mtr_paytax
614
- )
615
- assert len(outdf.index) == self.calc.array_len
616
- outdf.to_sql('reform', dbcon, if_exists='replace', index=False)
617
- dbcon.close()
618
- del outdf
619
- gc.collect()
620
534
 
621
535
  def write_tables_file(self):
622
536
  """
623
537
  Write tables to text file.
624
538
  """
625
539
  # pylint: disable=too-many-locals
626
- tab_fname = self._output_filename.replace('.csv', '-tab.text')
540
+ tab_fname = self.output_filename.replace('.xxx', '-tables.text')
627
541
  # skip tables if there are not some positive weights
628
- if self.calc_base.total_weight() <= 0.:
542
+ if self.calc_bas.total_weight() <= 0.:
629
543
  with open(tab_fname, 'w', encoding='utf-8') as tfile:
630
544
  msg = 'No tables because sum of weights is not positive\n'
631
545
  tfile.write(msg)
@@ -634,16 +548,16 @@ class TaxCalcIO():
634
548
  # - weights don't change with reform
635
549
  # - expanded_income may change, so always use baseline expanded income
636
550
  nontax_vars = ['s006', 'expanded_income']
637
- nontax = [self.calc_base.array(var) for var in nontax_vars]
551
+ nontax = [self.calc_bas.array(var) for var in nontax_vars]
638
552
  # create list of results for tax variables from reform Calculator
639
553
  tax_vars = ['iitax', 'payrolltax', 'lumpsum_tax', 'combined']
640
- reform = [self.calc.array(var) for var in tax_vars]
554
+ reform = [self.calc_ref.array(var) for var in tax_vars]
641
555
  # create DataFrame with tax distribution under reform
642
556
  dist = nontax + reform # using expanded_income under baseline policy
643
557
  all_vars = nontax_vars + tax_vars
644
558
  distdf = pd.DataFrame(data=np.column_stack(dist), columns=all_vars)
645
559
  # create DataFrame with tax differences (reform - baseline)
646
- base = [self.calc_base.array(var) for var in tax_vars]
560
+ base = [self.calc_bas.array(var) for var in tax_vars]
647
561
  change = [(reform[idx] - base[idx]) for idx in range(0, len(tax_vars))]
648
562
  diff = nontax + change # using expanded_income under baseline policy
649
563
  diffdf = pd.DataFrame(data=np.column_stack(diff), columns=all_vars)
@@ -656,6 +570,10 @@ class TaxCalcIO():
656
570
  del distdf
657
571
  del diffdf
658
572
  gc.collect()
573
+ if not self.silent:
574
+ print( # pragma: no cover
575
+ f'Write tabular output to file {tab_fname}'
576
+ )
659
577
 
660
578
  @staticmethod
661
579
  def write_decile_table(dfx, tfile, tkind='Totals'):
@@ -737,32 +655,32 @@ class TaxCalcIO():
737
655
  Write graphs to HTML files.
738
656
  All graphs contain same number of filing units in each quantile.
739
657
  """
740
- pos_wght_sum = self.calc.total_weight() > 0.0
658
+ pos_wght_sum = self.calc_ref.total_weight() > 0.0
741
659
  fig = None
742
660
  # percentage-aftertax-income-change graph
743
- pch_fname = self._output_filename.replace('.csv', '-pch.html')
661
+ pch_fname = self.output_filename.replace('.xxx', '-pch.html')
744
662
  pch_title = 'PCH by Income Percentile'
745
663
  if pos_wght_sum:
746
- fig = self.calc_base.pch_graph(self.calc, pop_quantiles=False)
664
+ fig = self.calc_bas.pch_graph(self.calc_ref, pop_quantiles=False)
747
665
  write_graph_file(fig, pch_fname, pch_title)
748
666
  else:
749
667
  reason = 'No graph because sum of weights is not positive'
750
668
  TaxCalcIO.write_empty_graph_file(pch_fname, pch_title, reason)
751
669
  # average-tax-rate graph
752
- atr_fname = self._output_filename.replace('.csv', '-atr.html')
670
+ atr_fname = self.output_filename.replace('.xxx', '-atr.html')
753
671
  atr_title = 'ATR by Income Percentile'
754
672
  if pos_wght_sum:
755
- fig = self.calc_base.atr_graph(self.calc, pop_quantiles=False)
673
+ fig = self.calc_bas.atr_graph(self.calc_ref, pop_quantiles=False)
756
674
  write_graph_file(fig, atr_fname, atr_title)
757
675
  else:
758
676
  reason = 'No graph because sum of weights is not positive'
759
677
  TaxCalcIO.write_empty_graph_file(atr_fname, atr_title, reason)
760
678
  # marginal-tax-rate graph
761
- mtr_fname = self._output_filename.replace('.csv', '-mtr.html')
679
+ mtr_fname = self.output_filename.replace('.xxx', '-mtr.html')
762
680
  mtr_title = 'MTR by Income Percentile'
763
681
  if pos_wght_sum:
764
- fig = self.calc_base.mtr_graph(
765
- self.calc,
682
+ fig = self.calc_bas.mtr_graph(
683
+ self.calc_ref,
766
684
  alt_e00200p_text='Taxpayer Earnings',
767
685
  pop_quantiles=False
768
686
  )
@@ -773,6 +691,12 @@ class TaxCalcIO():
773
691
  if fig:
774
692
  del fig
775
693
  gc.collect()
694
+ if not self.silent:
695
+ print( # pragma: no cover
696
+ f'Write graphical output to file {pch_fname}\n'
697
+ f'Write graphical output to file {atr_fname}\n'
698
+ f'Write graphical output to file {mtr_fname}'
699
+ )
776
700
 
777
701
  @staticmethod
778
702
  def write_empty_graph_file(fname, title, reason):
@@ -788,45 +712,108 @@ class TaxCalcIO():
788
712
  with open(fname, 'w', encoding='utf-8') as gfile:
789
713
  gfile.write(txt)
790
714
 
791
- def minimal_output(self):
715
+ BASE_DUMPVARS = [
716
+ 'RECID',
717
+ 's006',
718
+ 'data_source',
719
+ 'XTOT',
720
+ 'MARS',
721
+ 'expanded_income',
722
+ ]
723
+ MINIMAL_DUMPVARS = [
724
+ 'RECID',
725
+ 'iitax',
726
+ ]
727
+ MTR_DUMPVARS = [
728
+ 'mtr_itax',
729
+ 'mtr_ptax',
730
+ ]
731
+
732
+ def dump_variables(self, dumpvars_str):
792
733
  """
793
- Extract minimal output and return it as Pandas DataFrame.
734
+ Return set of variable names extracted from dumpvars_str, plus
735
+ minimal baseline/reform variables even if not in dumpvars_str.
736
+ Also, builds self.errmsg if any specified variables are not valid.
794
737
  """
795
- varlist = ['RECID', 'YEAR', 'WEIGHT', 'INCTAX', 'LSTAX', 'PAYTAX']
796
- odict = {}
797
- scalc = self.calc
798
- odict['RECID'] = scalc.array('RECID') # id for tax filing unit
799
- odict['YEAR'] = self.tax_year() # tax calculation year
800
- odict['WEIGHT'] = scalc.array('s006') # sample weight
801
- odict['INCTAX'] = scalc.array('iitax') # federal income taxes
802
- odict['LSTAX'] = scalc.array('lumpsum_tax') # lump-sum tax
803
- odict['PAYTAX'] = scalc.array('payrolltax') # payroll taxes (ee+er)
804
- odf = pd.DataFrame(data=odict, columns=varlist)
805
- return odf
806
-
807
- def dump_output(self, calcx, dump_varset, mtr_inctax, mtr_paytax):
738
+ assert isinstance(dumpvars_str, str)
739
+ self.errmsg = ''
740
+ # change some common non-space delimiter characters into spaces
741
+ dumpvars_str = dumpvars_str.replace(',', ' ')
742
+ dumpvars_str = dumpvars_str.replace(';', ' ')
743
+ dumpvars_str = dumpvars_str.replace('|', ' ')
744
+ # split dumpvars_str into a set of dump variables
745
+ dumpvars = dumpvars_str.split()
746
+ # check that all dumpvars items are valid
747
+ recs_vinfo = Records(data=None) # contains records VARINFO only
748
+ valid_set = recs_vinfo.USABLE_READ_VARS | recs_vinfo.CALCULATED_VARS
749
+ for var in dumpvars:
750
+ if var not in valid_set and var not in TaxCalcIO.MTR_DUMPVARS:
751
+ msg = f'invalid variable name {var} in DUMPVARS file'
752
+ self.errmsg += f'ERROR: {msg}\n'
753
+ if self.errmsg:
754
+ return []
755
+ # construct variable list
756
+ dumpvars_list = TaxCalcIO.MINIMAL_DUMPVARS
757
+ for var in dumpvars:
758
+ if var not in dumpvars_list and var not in TaxCalcIO.BASE_DUMPVARS:
759
+ dumpvars_list.append(var)
760
+ return dumpvars_list
761
+
762
+ def write_dumpdb_file(
763
+ self,
764
+ dump_varlist,
765
+ mtr_ptax_ref, mtr_itax_ref,
766
+ mtr_ptax_bas, mtr_itax_bas,
767
+ ):
808
768
  """
809
- Extract dump output and return it as Pandas DataFrame.
769
+ Write dump output to SQLite database file.
810
770
  """
811
- recs_vinfo = Records(data=None) # contains only Records VARINFO
812
- if dump_varset is None:
813
- varset = recs_vinfo.USABLE_READ_VARS | recs_vinfo.CALCULATED_VARS
814
- else:
815
- varset = dump_varset
816
- # create and return dump output DataFrame
817
- odf = pd.DataFrame()
818
- for varname in varset:
819
- vardata = calcx.array(varname)
820
- if varname in recs_vinfo.INTEGER_VARS:
821
- odf[varname] = vardata
822
- else: # specify precision that can handle small TMD area weights
823
- odf[varname] = vardata.round(5)
824
- odf = odf.copy()
825
- # specify mtr values in percentage terms
826
- if 'mtr_inctax' in varset:
827
- odf['mtr_inctax'] = (mtr_inctax * 100).round(2)
828
- if 'mtr_paytax' in varset:
829
- odf['mtr_paytax'] = (mtr_paytax * 100).round(2)
830
- # specify tax calculation year
831
- odf['FLPDYR'] = self.tax_year()
832
- return odf
771
+ # pylint: disable=too-many-arguments,too-many-positional-arguments
772
+ def dump_output(calcx, dumpvars, mtr_itax, mtr_ptax):
773
+ """
774
+ Extract dump output from calcx and return it as Pandas DataFrame.
775
+ """
776
+ odf = pd.DataFrame()
777
+ for var in dumpvars:
778
+ if var in TaxCalcIO.MTR_DUMPVARS:
779
+ if var == 'mtr_itax':
780
+ odf[var] = mtr_itax
781
+ elif var == 'mtr_ptax':
782
+ odf[var] = mtr_ptax
783
+ else:
784
+ odf[var] = calcx.array(var)
785
+ return odf
786
+ # begin main logic
787
+ assert isinstance(dump_varlist, list)
788
+ assert len(dump_varlist) > 0
789
+ db_fname = self.output_filename.replace('.xxx', '.db')
790
+ dbcon = sqlite3.connect(db_fname)
791
+ # write base table
792
+ outdf = pd.DataFrame()
793
+ for var in TaxCalcIO.BASE_DUMPVARS:
794
+ outdf[var] = self.calc_bas.array(var)
795
+ outdf['income_group'] = 0
796
+ assert len(outdf.index) == self.calc_bas.array_len
797
+ outdf.to_sql('base', dbcon, index=False)
798
+ del outdf
799
+ # write baseline table
800
+ outdf = dump_output(
801
+ self.calc_bas, dump_varlist, mtr_itax_bas, mtr_ptax_bas,
802
+ )
803
+ assert len(outdf.index) == self.calc_bas.array_len
804
+ outdf.to_sql('baseline', dbcon, index=False)
805
+ del outdf
806
+ # write reform table
807
+ outdf = dump_output(
808
+ self.calc_ref, dump_varlist, mtr_itax_ref, mtr_ptax_ref,
809
+ )
810
+ assert len(outdf.index) == self.calc_ref.array_len
811
+ outdf.to_sql('reform', dbcon, index=False)
812
+ del outdf
813
+ dbcon.close()
814
+ del dbcon
815
+ gc.collect()
816
+ if not self.silent:
817
+ print( # pragma: no cover
818
+ f'Write dump output to sqlite3 database file {db_fname}'
819
+ )