wipo-gbd-transformation 1.1.54__py3-none-any.whl → 1.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wipo-gbd-transformation might be problematic. Click here for more details.

Files changed (24) hide show
  1. gbdtransformation/brands/chtm/filters.py +159 -181
  2. gbdtransformation/brands/chtm/schema +79 -91
  3. gbdtransformation/brands/chtm/template.yml +98 -107
  4. gbdtransformation/brands/natm/__init__.py +2 -3
  5. gbdtransformation/brands/natm/filters.py +8 -2
  6. gbdtransformation/brands/notm/__init__.py +5 -0
  7. gbdtransformation/brands/notm/filters.py +98 -0
  8. gbdtransformation/brands/notm/template.yml +165 -0
  9. gbdtransformation/brands/phtm/schema +79 -50
  10. gbdtransformation/brands/phtm/schema.classic +50 -0
  11. gbdtransformation/brands/phtm/template.classic.yml +102 -0
  12. gbdtransformation/brands/phtm/template.yml +1 -102
  13. gbdtransformation/brands/xxxx/template.yml +1 -1
  14. gbdtransformation/designs/woid/filters.py +11 -2
  15. gbdtransformation/execs-nico.py +709 -0
  16. gbdtransformation/execs.py +9 -4
  17. gbdtransformation/gbd-transform.exec.tgz +0 -0
  18. {wipo_gbd_transformation-1.1.54.dist-info → wipo_gbd_transformation-1.1.56.dist-info}/METADATA +3 -1
  19. {wipo_gbd_transformation-1.1.54.dist-info → wipo_gbd_transformation-1.1.56.dist-info}/RECORD +24 -17
  20. {wipo_gbd_transformation-1.1.54.dist-info → wipo_gbd_transformation-1.1.56.dist-info}/LICENSE.md +0 -0
  21. {wipo_gbd_transformation-1.1.54.dist-info → wipo_gbd_transformation-1.1.56.dist-info}/SOURCES_Stefans-Mac-Studio.local_Sep-18-063455-2024_Conflict.txt +0 -0
  22. {wipo_gbd_transformation-1.1.54.dist-info → wipo_gbd_transformation-1.1.56.dist-info}/WHEEL +0 -0
  23. {wipo_gbd_transformation-1.1.54.dist-info → wipo_gbd_transformation-1.1.56.dist-info}/entry_points.txt +0 -0
  24. {wipo_gbd_transformation-1.1.54.dist-info → wipo_gbd_transformation-1.1.56.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,709 @@
1
+ import sys
2
+ import time
3
+ import argparse
4
+ import random
5
+ import os
6
+ import traceback
7
+ import difflib
8
+ import gzip
9
+ import multiprocessing
10
+ import xml.etree.ElementTree as ET
11
+ import concurrent.futures
12
+ import pprint
13
+
14
+ from tabulate import tabulate
15
+ from gbdtransformation.parser import Parser
16
+
17
+
18
+ def build_command_parser(options, doc):
19
+ """Argparse builder
20
+ @param options: the dict of config options
21
+ @pram doc: the helper for the command
22
+ return parsed args"""
23
+ parser = argparse.ArgumentParser(description=doc,
24
+ formatter_class=argparse.RawTextHelpFormatter)
25
+ for config in options:
26
+ name = config.pop('name')
27
+ parser.add_argument(*name, **config)
28
+ return parser.parse_args()
29
+
30
+ parsers = {}
31
+
32
+
33
+ class bcolors:
34
+ HEADER = '\033[95m'
35
+ OKBLUE = '\033[94m'
36
+ OKGREEN = '\033[92m'
37
+ INFO = '\033[92m'
38
+ WARNING = '\033[93m'
39
+ FAIL = '\033[91m'
40
+ CRITICAL = '\033[91m'
41
+ ERROR = '\033[91m'
42
+ ENDC = '\033[0m'
43
+ BOLD = '\033[1m'
44
+ UNDERLINE = '\033[4m'
45
+
46
+ class progress:
47
+ def __init__(self, total):
48
+ self.total = total
49
+ self.done = 0
50
+
51
+ def start(self):
52
+ printProgressBar(0, self.total,
53
+ prefix='Progress:', suffix='Complete', length=50)
54
+
55
+ def advance(self, value):
56
+ self.done = value
57
+ printProgressBar(self.done, self.total,
58
+ prefix='Progress:', suffix='Complete', length=50)
59
+
60
+ def advance_with_step(self, value):
61
+ self.done += value
62
+ printProgressBar(self.done, self.total,
63
+ prefix='Progress:', suffix='Complete', length=50)
64
+
65
+
66
+ def test():
67
+ doc = """
68
+ Runs regression tests
69
+ """
70
+ configs = [{
71
+ 'name': ['--junit'],
72
+ 'dest': 'junit',
73
+ 'help': 'saves in junit format',
74
+ 'action': 'store_true',
75
+ 'default': False
76
+ }]
77
+ args = build_command_parser(configs, doc)
78
+ pkg_folder = os.path.dirname(__file__)
79
+ test_to_run = []
80
+ for type in ['brands', 'designs']:
81
+ path = os.path.join(pkg_folder, type)
82
+ for root, dirs, files in os.walk(path):
83
+ if 'tests' in dirs:
84
+ template = os.path.basename(root)
85
+ for file in os.listdir(os.path.join(root, 'tests')):
86
+ if file.startswith('_'):
87
+ continue
88
+ if file.endswith('.out'):
89
+ continue
90
+ filename, ext = os.path.splitext(file)
91
+ input_file_path = os.path.join(root, 'tests', file)
92
+ out_file_path = input_file_path.replace(ext, '.out')
93
+ has_output = os.path.exists(out_file_path)
94
+ test_to_run.append({
95
+ 'template': template,
96
+ 'path': input_file_path,
97
+ 'test_output': has_output,
98
+ 'invalid_output': None
99
+ })
100
+ for test in test_to_run:
101
+ res, exceptions, error = _run_per_file(
102
+ test['template'], test['path'])
103
+ test['execution'] = res
104
+ test['errors'] = exceptions
105
+ filename, ext = os.path.splitext(test['path'])
106
+ if test['test_output']:
107
+ expected = ''
108
+ with open(test['path'].replace(ext, '.out'), 'r') as f:
109
+ expected = [e.replace('\n', '') for e in f.readlines()]
110
+ delta = difflib.ndiff(expected, res.split('\n'))
111
+ to_outup_diffs = []
112
+ for d in delta:
113
+ if d[0] != ' ':
114
+ to_outup_diffs.append(d)
115
+ else:
116
+ if to_outup_diffs:
117
+ break
118
+ to_outup_diffs = []
119
+ test['invalid_output'] = '\n'.join(to_outup_diffs)
120
+ display = [
121
+ ['Nb.', 'Template', 'Input', 'Has run?', 'Errors', 'Valid output']
122
+ ]
123
+ counter = 0
124
+ if args.junit:
125
+ total = 0
126
+ errors = 0
127
+ fail = 0
128
+ tests_run_xml = []
129
+ for test in test_to_run:
130
+ if test['errors']:
131
+ tmp = '''<failure type="Conversion error">
132
+ %s
133
+ </failure>''' % test['errors']
134
+ elif test['test_output'] and test['invalid_output']:
135
+ tmp = '''<failure type="Invalid output">
136
+ %s
137
+ </failure>''' % test['invalid_output']
138
+ else:
139
+ tmp = ''
140
+ current = '''
141
+ <testcase classname="%s" name="%s" time="0.001">
142
+ %s
143
+ </testcase>''' % (test['path'], test['template'], tmp)
144
+ total += 1
145
+ if test['errors']:
146
+ errors += 1
147
+ elif test['test_output']:
148
+ if test['invalid_output']:
149
+ fail += 1
150
+ tests_run_xml.append(current)
151
+ payload = '\n'.join(tests_run_xml)
152
+ xml = '''<?xml version="1.0" encoding="UTF-8"?>
153
+ <testsuite name="integration" tests="%s" errors="%s" failures="%s" skip="0">
154
+ %s
155
+ </testsuite>''' % (total, errors, fail, payload)
156
+ with open('tests.xml', 'w') as f:
157
+ f.write(xml)
158
+ for test in test_to_run:
159
+ counter += 1
160
+ has_run = u'\u2713'
161
+ color = ''
162
+ end_color = ''
163
+ valid_output = "No output to test"
164
+ if test['test_output']:
165
+ valid_output = u'\u2713'
166
+ if test['invalid_output']:
167
+ valid_output = test['invalid_output']
168
+ color = bcolors.WARNING
169
+ end_color = bcolors.ENDC
170
+ if test['errors']:
171
+ valid_output = u'\u2717'
172
+ has_run = u'\u2717'
173
+ color = bcolors.FAIL
174
+ end_color = bcolors.ENDC
175
+ test['errors'] = '\n'.join(['%s%s%s' % (color, e, end_color)
176
+ for e in test['errors'].split('\n')])
177
+ display.append([
178
+ '%s%s' % (color, counter), test['template'], os.path.basename(test['path']),
179
+ has_run, test['errors'], '%s%s%s' % (color,
180
+ valid_output, end_color)])
181
+ print(tabulate(display[1:], headers=display[0]))
182
+
183
+
184
+ def _run_per_file(template, path, input_string=None, validate=False):
185
+ parser = Parser(template)
186
+ if input_string:
187
+ data = input_string
188
+ else:
189
+ data = path
190
+ try:
191
+ transformed = parser.run(data, raise_errors=True)
192
+ if validate:
193
+ transformed, errors = parser.validate(transformed, gbd_format=transformed)
194
+ return (transformed, None, errors)
195
+ return (transformed, None, None)
196
+ except Exception as e:
197
+ return (None, traceback.format_exc(), None)
198
+
199
+
200
+ def printProgressBar(iteration, total, prefix = '', suffix = '',
201
+ decimals=1, length = 100, fill = '█', printEnd = "\r"):
202
+ """
203
+ Call in a loop to create terminal progress bar
204
+ @params:
205
+ iteration - Required : current iteration (Int)
206
+ total - Required : total iterations (Int)
207
+ prefix - Optional : prefix string (Str)
208
+ suffix - Optional : suffix string (Str)
209
+ decimals - Optional : positive number of decimals in percent complete (Int)
210
+ length - Optional : character length of bar (Int)
211
+ fill - Optional : bar fill character (Str)
212
+ printEnd - Optional : end character (e.g. "\r", "\r\n") (Str)
213
+ """
214
+ return
215
+ # percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
216
+ # filledLength = int(length * iteration // total)
217
+ # bar = fill * filledLength + '-' * (length - filledLength)
218
+ # print(f'\r{prefix} |{bar}| {percent}% {suffix}', end=printEnd)
219
+ # Print New Line on Complete
220
+ # if iteration == total:
221
+ # print()
222
+
223
+ def do_transform(file, templates, validate=False):
224
+ raw_data = __read_file(file)
225
+
226
+ for template in templates.split(','):
227
+ parser = parsers.get(template)
228
+ ret = {'src': file}
229
+ ret['fmt'] = 'gbd'
230
+ # get return from transformation
231
+ if template == 'solrjtm':
232
+ ret['fmt'] = 'idx'
233
+ try:
234
+ transformed = parser.run(raw_data, raise_errors=True)
235
+ ret['out'] = transformed
236
+ except Exception as e:
237
+ ret['terror'] = {'message': e, 'stacktrace': traceback.format_exc()}
238
+ else:
239
+
240
+ if not validate or template == 'solrjtm':
241
+ try:
242
+ transformed = parser.run(raw_data, raise_errors=True)
243
+ ret['out'] = transformed
244
+ raw_data = transformed
245
+ except Exception as e:
246
+ ret['terror'] = {'message': e, 'stacktrace': traceback.format_exc()}
247
+ # get return from transformation and validation
248
+ else:
249
+ try:
250
+ transformed, errors = parser.validate(raw_data)
251
+ ret['out'] = transformed
252
+ ret['verrors'] = errors
253
+ raw_data = transformed
254
+ except Exception as e:
255
+ ret['terror'] = {'message': e, 'stacktrace': traceback.format_exc()}
256
+ return ret
257
+
258
+
259
+ def _sub_arry_offset(max_paralel, length, offset):
260
+ if offset + max_paralel < length:
261
+ return offset + max_paralel
262
+ return length
263
+
264
+
265
+ def _paralel_process(path, xpath_lines):
266
+
267
+ max_parallel = 25
268
+ # Schedule an initial scan for each segment of the table. We read each
269
+ # segment in a separate thread, then look to see if there are more rows to
270
+ # read -- and if so, we schedule another scan.
271
+ tasks_to_do = []
272
+ for root, dirs, files in os.walk(path):
273
+ for f in files:
274
+ # TODO: match file name with regex
275
+ if f.endswith('.xml.gz'):
276
+ file2process = os.path.join(path, root, f)
277
+ tasks_to_do.append(file2process)
278
+ pbar = progress(len(tasks_to_do))
279
+
280
+ task_counter = 0
281
+ # Make the list an iterator, so the same tasks don't get run repeatedly.
282
+
283
+ with concurrent.futures.ThreadPoolExecutor() as executor:
284
+
285
+ # Schedule the initial batch of futures. Here we assume that
286
+ # max_scans_in_parallel < total_segments, so there's no risk that
287
+ # the queue will throw an Empty exception.
288
+ futures = {
289
+ executor.submit(_analyse_for_shazam, file2process, xpath_lines): file2process
290
+ for file2process in tasks_to_do[task_counter:_sub_arry_offset(max_parallel,
291
+ len(tasks_to_do),
292
+ task_counter)]
293
+ }
294
+ pbar.start()
295
+ task_counter = len(futures)
296
+ while futures:
297
+ # Wait for the first future to complete.
298
+ done, _ = concurrent.futures.wait(
299
+ futures, return_when=concurrent.futures.FIRST_COMPLETED
300
+ )
301
+ pbar.advance_with_step(len(done))
302
+ for fut in done:
303
+ res = fut.result()
304
+ file2process = futures.pop(fut)
305
+ yield xpath_lines
306
+
307
+ # Schedule the next batch of futures. At some point we might run out
308
+ # of entries in the queue if we've finished scanning the table, so
309
+ # we need to spot that and not throw.
310
+ for file2process in tasks_to_do[task_counter:_sub_arry_offset(len(done),
311
+ len(tasks_to_do),
312
+ task_counter)]:
313
+ task_counter += 1
314
+ futures[executor.submit(_analyse_for_shazam, file2process, xpath_lines)] = file2process
315
+
316
+ def _doc2xpath(el, path, lines, root=''):
317
+ lines.add(root + path)
318
+ path = root + path
319
+ # Print attributes
320
+ for name, val in el.items() :
321
+ lines.add(path + "[@" + _removeNS(name) + "=" + val+"]")
322
+ # Counter on the sibbling element names
323
+ counters = {}
324
+ # Loop on child elements
325
+ for childEl in el :
326
+ tag = _removeNS(childEl.tag)
327
+ # Tag name already encountered ?
328
+ if tag in counters:
329
+ continue
330
+ counters[tag] = 1
331
+ # Print child node recursively
332
+ _doc2xpath(childEl, '/' + tag, lines, root=path)
333
+
334
+ def _removeNS(tag) :
335
+ if tag.find('}') == -1 :
336
+ return tag
337
+ else:
338
+ return tag.split('}', 1)[1]
339
+
340
+ def _analyse_for_shazam(file2process, xpath_lines):
341
+ stream = __read_file(file2process)
342
+ tree = ET.ElementTree(ET.fromstring(stream))
343
+ troot = tree.getroot()
344
+ _doc2xpath(troot, _removeNS(troot.tag), xpath_lines)
345
+
346
+ def shazam():
347
+ doc = """
348
+ deduce xpath lines from a directory of xml files
349
+ """
350
+ configs = [{
351
+ 'name': ['path'],
352
+ 'type': str,
353
+ 'help': 'path to a file or a directory'
354
+ }, {
355
+ 'name': ['-o'],
356
+ 'dest': 'outfile',
357
+ 'help': 'write output to a file',
358
+ 'type': str,
359
+ 'default': None,
360
+ }, ]
361
+
362
+ args = build_command_parser(configs, doc)
363
+ path = args.path
364
+
365
+ if os.path.isfile(path):
366
+ print('Expected a directory location.')
367
+ sys.exit(1)
368
+ # a set to contain the unique xpath lines
369
+ xpath_lines = set()
370
+
371
+ # in case the path passed is relative
372
+ if not os.path.isabs(path):
373
+ path = os.path.realpath(os.path.join(os.getcwd(), path))
374
+ # passed a directory
375
+ current_xplath_lines = None
376
+ for tmp in _paralel_process(path, xpath_lines):
377
+ current_xplath_lines = tmp
378
+
379
+ xpath_lines = current_xplath_lines
380
+ if(args.outfile):
381
+ with open(args.outfile, 'w') as fh:
382
+ for line in sorted(xpath_lines):
383
+ xpath = line.split('/')
384
+ leaf = xpath.pop()
385
+ fh.write(''.join(['__' for p in xpath]) + '/'+leaf)
386
+ fh.write('\n')
387
+ else:
388
+ pprint.pprint(xpath_lines)
389
+
390
+
391
+
392
+
393
+ def _paralel_run(tasks_to_do, templates, pbar, validate=False, max_parallel=25):
394
+ # Schedule an initial scan for each segment of the table. We read each
395
+ # segment in a separate thread, then look to see if there are more rows to
396
+ # read -- and if so, we schedule another scan.
397
+
398
+ task_counter = 0
399
+ # Make the list an iterator, so the same tasks don't get run repeatedly.
400
+
401
+ with concurrent.futures.ThreadPoolExecutor() as executor:
402
+
403
+ # Schedule the initial batch of futures. Here we assume that
404
+ # max_scans_in_parallel < total_segments, so there's no risk that
405
+ # the queue will throw an Empty exception.
406
+ futures = {
407
+ executor.submit(do_transform, file2process, templates, validate): file2process
408
+ for file2process in tasks_to_do[task_counter:_sub_arry_offset(max_parallel,
409
+ len(tasks_to_do),
410
+ task_counter)]
411
+ }
412
+ task_counter = len(futures)
413
+ while futures:
414
+ # Wait for the first future to complete.
415
+ processed, _ = concurrent.futures.wait(
416
+ futures, return_when=concurrent.futures.FIRST_COMPLETED
417
+ )
418
+ pbar.advance_with_step(len(processed))
419
+ for fut in processed:
420
+ res = fut.result()
421
+ file2process = futures.pop(fut)
422
+ yield res
423
+
424
+ # Schedule the next batch of futures. At some point we might run out
425
+ # of entries in the queue if we've finished scanning the table, so
426
+ # we need to spot that and not throw.
427
+ for file2process in tasks_to_do[task_counter:_sub_arry_offset(len(processed),
428
+ len(tasks_to_do),
429
+ task_counter)]:
430
+ task_counter += 1
431
+ futures[executor.submit(do_transform, file2process, templates, validate)] = file2process
432
+
433
+
434
+ def do_multiprocess(files, settings):
435
+ (args, pbar, done) = settings
436
+
437
+ results = []
438
+ # create parsers
439
+ for template in args.template.split(','):
440
+ parsers[template] = Parser(template)
441
+
442
+ for file in files:
443
+ results.append(do_transform(file, args.template, validate=args.validate))
444
+ done.value += 1
445
+ pbar.advance(done.value)
446
+ # for tmp in _paralel_run(files, args.template, pbar, validate=args.validate,
447
+ # max_parallel=args.threads):
448
+ # results.append(tmp)
449
+ return results
450
+
451
+
452
+ def run():
453
+ doc = """
454
+ transform input to output using a defined template name.
455
+ """
456
+ configs = [{
457
+ 'name': ['path'],
458
+ 'type': str,
459
+ 'help': 'path to a file or a directory'
460
+ }, {
461
+ 'name': ['template'],
462
+ 'type': str,
463
+ 'help': 'the template used for transformation'
464
+ }, {
465
+ 'name': ['-t'],
466
+ 'dest': 'top',
467
+ 'type': int,
468
+ 'help': 'number of files to run the command onto',
469
+ 'default': 0
470
+ }, {
471
+ 'name': ['-r'],
472
+ 'dest': 'random',
473
+ 'type': int,
474
+ 'help': 'number of *random* files to run the command onto',
475
+ 'default': 0
476
+ }, {
477
+ 'name': ['-w'],
478
+ 'dest': 'workers',
479
+ 'type': int,
480
+ 'help': 'number of workers to run the command',
481
+ 'default': 1
482
+ },{
483
+ 'name': ['-th'],
484
+ 'dest': 'threads',
485
+ 'type': int,
486
+ 'help': 'number of threads to run the command',
487
+ 'default': 25
488
+ },{
489
+ 'name': ['-o'],
490
+ 'dest': 'outfile',
491
+ 'help': 'write output to a file',
492
+ 'type': str,
493
+ 'default': None,
494
+ }, {
495
+ 'name': ['-a'],
496
+ 'dest': 'appendfile',
497
+ 'help': 'append output to a file',
498
+ 'type': str,
499
+ 'default': None,
500
+ }, {
501
+ 'name': ['--qc'],
502
+ 'dest': 'validate',
503
+ 'help': 'runs gbd-validate on output',
504
+ 'action': 'store_true',
505
+ 'default': False
506
+ }, {
507
+ 'name': ['-q', '--quiet'],
508
+ 'dest': 'quiet',
509
+ 'help': 'perform transformation quietly (do not print result of transformation)',
510
+ 'action': 'store_true',
511
+ 'default': False
512
+ }, ]
513
+ args = build_command_parser(configs, doc)
514
+
515
+
516
+ def _walk_dir(root_path, nb):
517
+ buffer = []
518
+ for root, dirs, files in os.walk(root_path):
519
+ for f in files:
520
+ if f.endswith('.xml.gz'): # or f.endswith('.xml'):
521
+ buffer.append(os.path.join(root_path, root, f))
522
+ if len(buffer) == nb:
523
+ return buffer
524
+ return buffer
525
+
526
+ def _fish_dir(root_path, nb):
527
+ buffer = []
528
+ path = root_path
529
+ # go fishing
530
+ while len(buffer) < nb:
531
+ sea = os.listdir(path)
532
+ # skip empty directories
533
+ if not len(sea):
534
+ path = root_path
535
+ continue
536
+ fish = os.path.join(path, random.choice(sea))
537
+ if os.path.isdir(fish):
538
+ path = fish
539
+ elif os.path.isfile(fish) and fish.endswith('.xml.gz'):
540
+ buffer.append(fish)
541
+ path = root_path
542
+ return buffer
543
+
544
+ path = args.path
545
+ # in case the path passed is relative
546
+ if not os.path.isabs(path):
547
+ path = os.path.realpath(os.path.join(os.getcwd(), path))
548
+
549
+ files = []
550
+ # passed a file
551
+ if os.path.isfile(path):
552
+ files.append(path)
553
+ # passed a directory
554
+ elif os.path.isdir(path):
555
+ if args.random:
556
+ files = _fish_dir(path, args.random)
557
+ else:
558
+ files = _walk_dir(path, args.top)
559
+ else:
560
+ raise Exception('invalid path %s. try again.' % path)
561
+
562
+
563
+
564
+ workers = min(multiprocessing.cpu_count() - 4, args.workers)
565
+
566
+ # print('Running template [%s] * [%s files] with [%s workers]' % (args.template,
567
+ # len(files), workers))
568
+ files_per_worker_len = len(files) / workers
569
+
570
+ files_per_worker = []
571
+ tmp = []
572
+ for el in files:
573
+ if len(tmp) >= files_per_worker_len:
574
+ files_per_worker.append(tmp)
575
+ tmp = []
576
+ tmp.append(el)
577
+ files_per_worker.append(tmp)
578
+
579
+ pbar = progress(len(files))
580
+ pbar.start()
581
+
582
+ # a way to share state among workers
583
+ mpmanager = multiprocessing.Manager()
584
+ done = mpmanager.Value('i', 0)
585
+
586
+ with multiprocessing.Pool(processes=workers) as pool: # auto closing workers
587
+ raw_results = pool.starmap(do_multiprocess, zip(files_per_worker, [(args, pbar, done) for x in files]))
588
+ results = []
589
+ for result in raw_results:
590
+ results.extend(result)
591
+
592
+ _print_transformation_out(results, args)
593
+ _print_transformation_err(results, args)
594
+ _print_validation_err(results, args)
595
+
596
+
597
+
598
+ def _print_transformation_out(results, args):
599
+ output_storage = args.outfile or args.appendfile or '/dev/null'
600
+ output_mode = 'a' if args.appendfile else 'w'
601
+ # fh = open(output_storage, output_mode)
602
+
603
+ dirFiles = {}
604
+ for r in results:
605
+ if r.get('out', None):
606
+ dir = os.path.dirname(r.get('src'))
607
+ parentDir = os.path.dirname(dir)
608
+ # print("dir: ", dir, " parent " , parentDir)
609
+ destFile = os.path.join(parentDir, r.get('fmt')+".json")
610
+ if destFile not in dirFiles:
611
+ dirFiles[parentDir] = destFile
612
+ # no support for append
613
+ dirHandles = {}
614
+ for dir in dirFiles:
615
+ dirHandles[dir] = open(dirFiles[dir],'w')
616
+ print("Creating this file: ", dirFiles[dir])
617
+ dirHandles[dir].write("[\n")
618
+
619
+ for result in results:
620
+ if result.get('out', None):
621
+ if not args.quiet:
622
+ print(result['out'])
623
+ childDir = os.path.dirname(result.get('src'))
624
+ dir = os.path.dirname(childDir)
625
+ #dirHandles[dir]#with open(dirFiles.get(dir), 'a') as df:
626
+ dirHandles[dir].write(result['out'])
627
+ dirHandles[dir].write(",\n")
628
+ # fh.write(result['out'])
629
+ # fh.write('\n')
630
+ for dh in dirHandles.values():
631
+ dh.write("{}]\n")
632
+ dh.close()
633
+
634
+ # fh.close()
635
+
636
+ def _print_validation_err(results, args):
637
+ if not args.validate:
638
+ return
639
+
640
+ display_lines = []
641
+
642
+ for result in results:
643
+ verrors = result.get('verrors', [])
644
+ if not len(verrors):
645
+ continue
646
+
647
+ display_line = {}
648
+ display_line['QC Invalid File'] = __format_color(result['src'], bcolors.FAIL)
649
+ display_line['Severity'] = []
650
+ display_line['Field'] = []
651
+ display_line['Message'] = []
652
+
653
+ for i, verror in enumerate(verrors):
654
+ severity = __format_color(verror['severity'], getattr(bcolors, verror['severity']))
655
+ field = verror['field']
656
+ message = verror['type']
657
+
658
+ display_line['Severity'].append(severity)
659
+ display_line['Field'].append(field)
660
+ display_line['Message'].append(message)
661
+
662
+ display_line['Severity'] = '\n'.join(display_line['Severity'])
663
+ display_line['Field'] = '\n'.join(display_line['Field'])
664
+ display_line['Message'] = '\n'.join(display_line['Message'])
665
+ display_lines.append(display_line)
666
+
667
+ if len(display_lines):
668
+ print('\n')
669
+ print(tabulate(display_lines, headers='keys', showindex='always', tablefmt='psql'))
670
+
671
+ def _print_transformation_err(results, args):
672
+ # a single file
673
+ if(len(results) == 1):
674
+ result = results[0]
675
+ if result.get('terror', None):
676
+ print(__format_color(result['terror']['stacktrace'], bcolors.FAIL))
677
+ return
678
+
679
+ # multi file
680
+ display_lines = []
681
+
682
+ for result in results:
683
+ if not result.get('terror', None):
684
+ continue
685
+
686
+ display_line = {}
687
+ display_line['Transformation Failed File'] = __format_color(result['src'], bcolors.FAIL)
688
+ display_line['Error Message'] = result['terror']['message']
689
+
690
+ display_lines.append(display_line)
691
+
692
+ if len(display_lines):
693
+ print('\n')
694
+ print(tabulate(display_lines, headers='keys', showindex='always', tablefmt='psql'))
695
+
696
+ def __format_color(value, color):
697
+ return '%s%s%s' % (color, value, bcolors.ENDC)
698
+
699
+ def __read_file(file):
700
+ if file.endswith('.xml.gz'):
701
+ with gzip.open(file, 'rb') as f:
702
+ raw_data = f.read()
703
+ return raw_data
704
+ else:
705
+ with open(file, 'r') as f:
706
+ raw_data = f.read()
707
+ return raw_data
708
+
709
+