texmark 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
texmark/__init__.py ADDED
File without changes
texmark/build.py ADDED
@@ -0,0 +1,178 @@
1
+ #!/usr/bin/env python3
2
+ import subprocess
3
+ from pathlib import Path
4
+ import os
5
+ import sys
6
+ import pypandoc
7
+ import json
8
+ import yaml
9
+ import jinja2
10
+ import frontmatter
11
+ import argparse
12
+ import texmark
13
+ import json
14
+ import panflute as pf
15
+ import io
16
+ from texmark.logs import logger
17
+
18
+ rootpath = Path(texmark.__file__).resolve().parent
19
+
20
+ def run(cmd, shell=False, check=True, **kwargs):
21
+ print(cmd if shell else ' '.join(cmd))
22
+ return subprocess.run(cmd, shell=shell, check=check, **kwargs)
23
+
24
+
25
+ def normalize_metadata(meta):
26
+ """
27
+ Recursively convert panflute metadata into plain JSON-serializable Python dict.
28
+ (Plain strings, lists, dicts, no MetaInlines etc.)
29
+ """
30
+ if isinstance(meta, pf.MetaInlines) or isinstance(meta, pf.MetaBlocks):
31
+ return pf.stringify(meta)
32
+ elif isinstance(meta, pf.MetaString):
33
+ return meta.text
34
+ elif isinstance(meta, pf.MetaBool):
35
+ return bool(meta)
36
+ elif isinstance(meta, pf.MetaList):
37
+ return [normalize_metadata(item) for item in meta]
38
+ elif isinstance(meta, pf.MetaMap):
39
+ return {key: normalize_metadata(value) for key, value in meta.items()}
40
+ else:
41
+ # Primitive types (str, int, etc.) or unknown - return as is
42
+ return meta
43
+
44
+
45
+ def build_tex(input_md, output_tex, template='', bib_file='', build_dir='build', filters=None, journal_template=None, filters_module=None):
46
+ # 1. Parse Markdown
47
+ input_text = open(input_md).read()
48
+ post = frontmatter.loads(input_text)
49
+ metadata = post.metadata
50
+ content = post.content
51
+
52
+ if not journal_template:
53
+ journal_template = metadata.get('journal', {}).get('template', 'default')
54
+ if not journal_template:
55
+ journal_template = "default"
56
+
57
+ metadata.setdefault('journal', {})['template'] = journal_template
58
+
59
+ if filters_module:
60
+ metadata['filters_module'] = filters_module
61
+
62
+ if not template:
63
+ template = metadata.get('template')
64
+ if not template:
65
+ template = f'templates/{journal_template}/template.tex'
66
+
67
+ template_folder = Path(template).parent
68
+ template_name = Path(template).name
69
+ resource_path = rootpath / template_folder
70
+
71
+ if not bib_file:
72
+ bib_file = metadata.get('bibliography', None)
73
+ if bib_file:
74
+ bib_args = ['--bibliography', bib_file]
75
+ args = bib_args + metadata.get('pandoc_args', []) + [
76
+ "--natbib",
77
+ ]
78
+
79
+ filters = [
80
+ "texmark-filter",
81
+ ] + (filters or metadata.get('filters', []))
82
+
83
+ # Step 1: Run pandoc to get JSON AST with filters applied, and updated metadata
84
+ cmd_json = []
85
+ for f in filters:
86
+ cmd_json.extend(['--filter', f])
87
+ cmd_json.extend(args)
88
+
89
+ post.metadata = metadata
90
+
91
+ ast_json_str = pypandoc.convert_text(
92
+ frontmatter.dumps(post),
93
+ format="markdown+footnotes",
94
+ to="json",
95
+ extra_args=cmd_json,
96
+ )
97
+
98
+ doc = pf.load(io.StringIO(ast_json_str)) # <-- no input_format argument
99
+ metadata.update(normalize_metadata(doc.metadata))
100
+
101
+ # Step 2. Render Jinja2 Template
102
+ env = jinja2.Environment(loader=jinja2.FileSystemLoader(resource_path))
103
+ template = env.get_template(template_name)
104
+
105
+ build_dir = Path(build_dir)
106
+ build_dir.mkdir(parents=True, exist_ok=True)
107
+
108
+ Path(output_tex).parent.mkdir(parents=True, exist_ok=True)
109
+
110
+ # Step 3: Render AST to LaTeX (filters not needed again)
111
+ body = pypandoc.convert_text(
112
+ ast_json_str,
113
+ format="json",
114
+ to="latex",
115
+ extra_args=['--template', rootpath / "templates" / "body.tex"] + args,
116
+ )
117
+
118
+ with open(output_tex, "w") as f:
119
+ f.write(template.render(body=body, **metadata)) # Includes authors/abstract
120
+
121
+ metadata["resource_path"] = str(resource_path)
122
+ return metadata
123
+
124
+
125
+ def compile_pdf(input_tex, output_pdf, engine='pdflatex', build_dir='build', images_dir='images', bib_file='references.bib', resource_path=''):
126
+ """
127
+ Step 2: Compile LaTeX source into PDF.
128
+ """
129
+ if resource_path:
130
+ print(f"Resource path: {resource_path}")
131
+ run(f"rsync -r {resource_path}/ {build_dir}/", shell=True)
132
+ # os.environ['TEXINPUTS'] = f"{resource_path}:" + os.environ.get('TEXINPUTS', '')
133
+
134
+ run(f"rsync -r {Path(images_dir)} {build_dir}/", shell=True)
135
+ run(f"rsync {input_tex} {build_dir}/", shell=True)
136
+ run(f"rsync {bib_file} {build_dir}/", shell=True)
137
+ cmd = [engine, '-interaction=nonstopmode', Path(input_tex).name]
138
+ run(cmd, cwd=build_dir, check=False)
139
+ bibcmd = ["bibtex", Path(input_tex).with_suffix(".aux").name]
140
+ run(bibcmd, cwd=build_dir, check=False)
141
+ run(cmd, cwd=build_dir, check=False)
142
+ run(cmd, cwd=build_dir, check=False)
143
+ # Rename/move the generated PDF if needed
144
+ actual_pdf = Path(build_dir) / Path(input_tex).with_suffix(".pdf").name
145
+ if Path(output_pdf) != actual_pdf:
146
+ run(['mv', str(actual_pdf), output_pdf])
147
+
148
+
149
+ def main():
150
+
151
+ parser = argparse.ArgumentParser(description='Two-step build: Markdown → LaTeX → PDF')
152
+ parser.add_argument('input', help='Input markdown file')
153
+ parser.add_argument('-j', '--journal-template', help='Pandoc LaTeX + filter template family. Update journal -> template yaml field)')
154
+ parser.add_argument('-t', '--template', help='Pandoc LaTeX template. Update template yaml field)')
155
+ parser.add_argument('-f', '--filters', nargs='*', help='Additional, custom filters. By default the pre-defined, custom filters for the journal are used via the `texmark-filter` utility.')
156
+ parser.add_argument('--filters-module', help='Load a custom filter module. This is a Python module that may extend the filters dict defined in the `texmark.shared` module.')
157
+ parser.add_argument('-o', '--output', help='Final PDF output filename')
158
+ parser.add_argument('-e', '--engine', default='pdflatex', help='LaTeX engine (e.g. pdflatex, xelatex)')
159
+ parser.add_argument('-d', '--build', default='build', help='build directory')
160
+ parser.add_argument('--bib', help='bibliography file')
161
+ parser.add_argument('--tex', help='LaTeX output filename')
162
+ parser.add_argument('--pdf', action="store_true")
163
+ parser.add_argument('--images', default='images', help='images directory')
164
+ args = parser.parse_args()
165
+
166
+ # Derive filenames
167
+ build_dir = Path(args.build)
168
+ tex_file = args.tex or build_dir / Path(args.input).with_suffix(".tex").name
169
+ pdf_file = args.output or build_dir / Path(args.input).with_suffix(".pdf").name
170
+
171
+ metadata = build_tex(args.input, tex_file, template=args.template, bib_file=args.bib, filters=args.filters, journal_template=args.journal_template, filters_module=args.filters_module)
172
+
173
+ if args.pdf:
174
+ compile_pdf(tex_file, pdf_file, args.engine, args.build, args.images, bib_file=metadata.get('bibliography'), resource_path=metadata.get('resource_path'))
175
+
176
+
177
+ if __name__ == '__main__':
178
+ main()
texmark/filters.py ADDED
@@ -0,0 +1,114 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ import json
5
+ import importlib
6
+ import panflute as pf
7
+ from texmark.logs import logger
8
+ from texmark.shared import filters, default_filter
9
+ from texmark.shared import JournalFilter, filters, logger, Processor
10
+ from texmark.sectiontracker import SectionProcessor
11
+
12
+ copernicus_filter = JournalFilter(
13
+ processors = [
14
+ SectionProcessor(
15
+ extract_sections=['abstract', 'appendix', 'acknowledgements', 'author-contributions', 'competing-interests'],
16
+ sections_map={
17
+ 'author-contributions': 'authorcontribution',
18
+ 'competing-interests': 'competinginterests',
19
+ },
20
+ remap_command_sections={
21
+ 'introduction': r'\introduction',
22
+ 'conclusions': r'\conclusions'
23
+ }
24
+ )
25
+ ])
26
+
27
+ for journal in ["copernicus", "cp", "esd"]:
28
+ filters[journal] = [copernicus_filter]
29
+
30
+
31
+ def force_cite(elem, doc):
32
+ if isinstance(elem, pf.Cite):
33
+ keys = [c.id for c in elem.citations]
34
+ key_str = ",".join(keys)
35
+ # Build as raw LaTeX \cite{}
36
+ return pf.RawInline(f'\\cite{{{key_str}}}', format='latex')
37
+
38
+ def header_to_unnumbered(elem, doc):
39
+ if isinstance(elem, pf.Header):
40
+ # Convert header to raw LaTeX \section*{...}
41
+ level = elem.level
42
+ content = pf.stringify(elem)
43
+ latex_cmd = f'\\{"sub" * (level - 1)}section*{{{content}}}'
44
+ return pf.RawBlock(latex_cmd, format='latex')
45
+
46
+ def header_to_paragraph(elem, doc):
47
+ if isinstance(elem, pf.Header):
48
+ # Convert header to raw LaTeX \section*{...}
49
+ level = elem.level
50
+ content = pf.stringify(elem)
51
+ latex_cmd = f'\\paragraph*{{{content+"."}}}'
52
+ return pf.RawBlock(latex_cmd, format='latex')
53
+
54
+
55
+ science_filter = JournalFilter(
56
+ processors = [
57
+ SectionProcessor(
58
+ extract_sections=['abstract', 'appendix', 'acknowledgements', 'author-contributions',
59
+ 'competing-interests', 'methods', 'materials-and-methods', 'supplementary-material'],
60
+ sections_map={
61
+ 'author-contributions': 'authorcontribution',
62
+ 'competing-interests': 'competinginterests',
63
+ 'supplementary-material': 'appendix',
64
+ 'methods': 'materialsandmethods',
65
+ 'materials-and-methods': 'materialsandmethods',
66
+ },
67
+ remap_command_sections={
68
+ # 'introduction': r'\section*{Introduction}',
69
+ }
70
+ ),
71
+ force_cite,
72
+ header_to_paragraph,
73
+ ])
74
+
75
+ filters['science'] = [science_filter]
76
+
77
+ def run_filters(doc):
78
+
79
+ if doc is not None:
80
+ journal = doc.get_metadata('journal')
81
+ else:
82
+ logger.warning(f'doc is None')
83
+ journal = {'template': 'default'}
84
+
85
+ if doc.get_metadata('filters_module'):
86
+ filters_module = doc.get_metadata('filters_module')
87
+ logger.warning(f"Loading filters module: {filters_module}")
88
+ importlib.import_module(filters_module)
89
+
90
+
91
+ if journal.get("template") is None:
92
+ logger.warning(f'doc is None')
93
+
94
+ filters_ = filters.get(journal.get("template"))
95
+ if filters_ is None:
96
+ logger.warning(f'No filters found for journal template: {journal.get("template")}. Using default filter.')
97
+ filters_ = [default_filter]
98
+
99
+ for filter in filters_:
100
+ doc = pf.run_filter(action=filter.action,
101
+ prepare=filter.prepare,
102
+ finalize=filter.finalize, doc=doc)
103
+
104
+ return doc
105
+
106
+
107
+ def main(doc=None):
108
+ doc = pf.load(sys.stdin)
109
+ doc = run_filters(doc)
110
+ return pf.dump(doc)
111
+
112
+
113
+ if __name__ == '__main__':
114
+ main()
texmark/logs.py ADDED
@@ -0,0 +1,3 @@
1
+ import logging
2
+ logger = logging.getLogger("texmark")
3
+ logger.setLevel(logging.DEBUG)
@@ -0,0 +1,136 @@
1
+ import json
2
+ import panflute as pf
3
+ from panflute import stringify, run_filter, Header, RawBlock, RawInline, convert_text, Block
4
+ from texmark.logs import logger
5
+ import io
6
+
7
+ def panflute2latex(elements, wrap='none') -> str:
8
+ blocks = []
9
+ inline_buffer = []
10
+
11
+ for el in elements:
12
+ if isinstance(el, pf.Block):
13
+ # Flush any accumulated inlines before adding a block
14
+ if inline_buffer:
15
+ blocks.append(pf.Para(*inline_buffer))
16
+ inline_buffer = []
17
+ blocks.append(el)
18
+
19
+ elif isinstance(el, pf.Inline):
20
+ inline_buffer.append(el)
21
+
22
+ else:
23
+ raise TypeError(f"Unsupported element type: {type(el)}")
24
+
25
+ # Flush remaining inlines into a final paragraph
26
+ if inline_buffer:
27
+ blocks.append(pf.Para(*inline_buffer))
28
+
29
+ doc = pf.Doc(*blocks)
30
+
31
+ # Safer output buffering
32
+ buffer = io.BytesIO()
33
+ writer = io.TextIOWrapper(buffer, encoding='utf-8')
34
+ pf.dump(doc, writer)
35
+ writer.flush()
36
+
37
+ json_ast_str = buffer.getvalue().decode('utf-8')
38
+
39
+ latex = pf.convert_text(
40
+ json_ast_str,
41
+ input_format='json',
42
+ output_format='latex',
43
+ extra_args=[f'--wrap={wrap}']
44
+ )
45
+
46
+ return latex
47
+
48
+
49
+
50
+ class SectionTracker:
51
+ def __init__(self):
52
+ self.active_section = None
53
+ self.section_content = []
54
+ self.section_level = 0
55
+ self.sections = {}
56
+
57
+ def reset(self):
58
+ if self.active_section:
59
+ self.sections[self.active_section] = {
60
+ 'content': self.section_content,
61
+ 'level': self.section_level
62
+ }
63
+ self.active_section = None
64
+ self.section_content = []
65
+ self.section_level = 0
66
+
67
+
68
+ class SectionProcessor:
69
+ def __init__(self, extract_sections, sections_map={}, remap_command_sections={}):
70
+ self.extract_sections = extract_sections
71
+ self.sections_map = sections_map or {}
72
+ self.remap_command_sections = remap_command_sections or {}
73
+
74
+ def prepare(self, doc):
75
+ doc.tracker = SectionTracker()
76
+ doc.extract_sections = self.extract_sections
77
+ doc.sections_map = self.sections_map
78
+
79
+ def action(self, elem, doc):
80
+ tracker = doc.tracker
81
+ # logger.warning(f"check elem {elem} {stringify(elem)}")
82
+
83
+ # Header processing
84
+ if isinstance(elem, Header):
85
+ title = elem.identifier
86
+
87
+ # Check if we're entering a target section
88
+ if title in doc.extract_sections:
89
+ tracker.reset()
90
+ tracker.active_section = title
91
+ tracker.section_level = elem.level
92
+ # logger.warning(f"!!Remove {elem}")
93
+ return [] # Remove original header
94
+
95
+ # Check if we're exiting a section
96
+ if tracker.active_section and elem.level <= tracker.section_level:
97
+ # logger.warning(f"Exit {tracker.active_section} with {elem} {stringify(elem)}")
98
+ # logger.warning(f"Last element of {tracker.active_section} {tracker.section_content[-1]}")
99
+ # logger.warning(f"Remove last: {tracker.section_content[-1]} {stringify(tracker.section_content[-1])}")
100
+ tracker.section_content = tracker.section_content[:-1]
101
+ tracker.reset()
102
+
103
+ # Check if the header is a target section for remap header command
104
+ if title in self.remap_command_sections:
105
+ # Replace header with the remapped command
106
+ command = self.remap_command_sections[title]
107
+ return RawBlock(command, format='latex')
108
+
109
+
110
+ # Content collection
111
+ if tracker.active_section:
112
+ tracker.section_content.append(elem)
113
+ return [] # Remove from main flow
114
+
115
+
116
+ def finalize(self, doc):
117
+ tracker = doc.tracker
118
+ tracker.reset() # Capture last section
119
+
120
+ # Convert collected sections to LaTeX
121
+ for section in doc.extract_sections:
122
+ meta_key = doc.sections_map.get(section, section)
123
+ if section in tracker.sections:
124
+ inline_elements = tracker.sections[section]['content']
125
+ latex = panflute2latex(inline_elements)
126
+ doc.metadata[meta_key] = RawInline(latex, format='latex')
127
+
128
+
129
+ def main(doc=None):
130
+ extractor = SectionProcessor(
131
+ extract_sections=["introduction", "methods", "conclusions", "acknowledgements"],
132
+ )
133
+ return run_filter(extractor.action, prepare=extractor.prepare, finalize=extractor.finalize, doc=doc)
134
+
135
+ if __name__ == '__main__':
136
+ main()
texmark/shared.py ADDED
@@ -0,0 +1,83 @@
1
+ import sys
2
+ import panflute as pf
3
+ from panflute import Image, Table
4
+ from texmark.logs import logger
5
+
6
+ def _run_action(action, elem, doc):
7
+ result = action(elem, doc)
8
+ if result is None:
9
+ return elem
10
+ return result
11
+
12
+ class Processor:
13
+ def __init__(self, action=None, prepare=None, finalize=None):
14
+ self._action = action
15
+ self._prepare = prepare
16
+ self._finalize = finalize
17
+
18
+ def action(self, elem, doc):
19
+ if self._action:
20
+ return _run_action(self._action, elem, doc)
21
+ return elem
22
+ def prepare(self, doc):
23
+ if self._prepare:
24
+ return _run_action(self._prepare, doc, doc)
25
+ return doc
26
+ def finalize(self, doc):
27
+ if self._finalize:
28
+ return _run_action(self._finalize, doc, doc)
29
+ return doc
30
+
31
+ class JournalFilter:
32
+ def __init__(self, processors=None):
33
+ self.processors = processors or []
34
+
35
+ def prepare(self, doc):
36
+ for processor in self.processors:
37
+ if hasattr(processor, "prepare"):
38
+ processor.prepare(doc)
39
+
40
+ def action(self, elem, doc):
41
+
42
+ if hasattr(elem, 'url'):
43
+ if elem.url.startswith('/'):
44
+ # Remove leading slash to make it repo-root relative (like GitHub)
45
+ elem.url = elem.url.lstrip('/')
46
+
47
+ if isinstance(elem, Image):
48
+ elem = _run_action(self.transform_figure, elem, doc)
49
+
50
+ elif isinstance(elem, Table):
51
+ elem = _run_action(self.transform_table, elem, doc)
52
+
53
+ # if isinstance(elem, Header):
54
+ # return self.transform_header(elem, doc)
55
+ for processor in self.processors:
56
+ elem = _run_action(processor if callable(processor) else processor.action, elem, doc)
57
+
58
+ return elem
59
+
60
+ def finalize(self, doc):
61
+ for processor in self.processors:
62
+ if hasattr(processor, "finalize"):
63
+ processor.finalize(doc)
64
+
65
+ # def transform_header(self, elem, doc):
66
+ # pass
67
+
68
+ def transform_table(self, elem, doc):
69
+ pass
70
+
71
+ def transform_figure(self, elem, doc):
72
+ pass
73
+
74
+
75
+ filters = {}
76
+
77
+ def register(name):
78
+ def decorator(filter):
79
+ filters[name] = filter
80
+ return filter
81
+
82
+ default_filter = JournalFilter()
83
+ filters["default"] = [default_filter]
@@ -0,0 +1 @@
1
+ $body$
@@ -0,0 +1,22 @@
1
+ File: README_copernicus_package_7_11.txt
2
+ -------------------------------------------------------------------------
3
+ This is a README file for the Copernicus Publications LaTeX Macro Package
4
+ copernicus_package.zip in the version 7.11, 9 April 2025
5
+ -------------------------------------------------------------------------
6
+ It consists of several files, each with its separate copyright.
7
+ This specific archive is collected for journals published by
8
+ Copernicus Publications (Copernicus GmbH).
9
+
10
+ Copyright (C) 2025 Copernicus GmbH
11
+
12
+ E-mail: publications@copernicus.org
13
+ URL: https://publications.copernicus.org
14
+
15
+
16
+
17
+ Content:
18
+ - copernicus.cls: The LaTeX2e class file designed for Copernicus Publications journals. Current Version 10.1.26, 14 March 2025
19
+ - copernicus.cfg: The configuration file containing journal-specific information used by the class file. Last update 9 March 2024
20
+ - copernicus.bst: The bibliographic style file for BibTeX. Current Version 1.6, 20 October 2023
21
+ - pdfscreencop.sty / pdfscreen.sty
22
+ - template.tex: A LaTeX template in journal style.