PyPI - txtdown - Versions diffs - 0.2.0__py3-none-any.whl - Mend

txtdown 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

txtdown/__init__.py +29 -0
txtdown/models.py +181 -0
txtdown/parser.py +379 -0
txtdown/writer.py +106 -0
txtdown-0.2.0.dist-info/METADATA +214 -0
txtdown-0.2.0.dist-info/RECORD +9 -0
txtdown-0.2.0.dist-info/WHEEL +5 -0
txtdown-0.2.0.dist-info/licenses/LICENSE +21 -0
txtdown-0.2.0.dist-info/top_level.txt +1 -0

txtdown/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""txtdown: Minimal markup for Latin text collections.
+Example usage:
+    >>> from txtdown import parse
+    >>> doc = parse("sulpicia.txtd")
+    >>> print(doc.metadata.author)
+    Sulpicia
+    >>> print(doc.sections[0].lines[0].text)
+    Tandem venit amor, qualem texisse pudori
+    >>> line = doc.get("1.3")
+    >>> print(line.text)
+    exorata meis illum Cytherea Camenis
+"""
+from .models import Document, Line, Metadata, Section
+from .parser import parse
+from .writer import write
+__version__ = "0.2.0"
+__all__ = [
+    "Document",
+    "Line",
+    "Metadata",
+    "Section",
+    "parse",
+    "write",
+    "__version__",
+]

txtdown/models.py ADDED Viewed

@@ -0,0 +1,181 @@
+"""Data models for txtdown documents."""
+from dataclasses import dataclass, field
+from typing import Any
+@dataclass
+class Metadata:
+    """Document metadata from YAML front matter.
+    Attributes:
+        author: Author name
+        work: Work title
+        source: Source URL or reference
+        scope: For partial files (e.g., "1" or "1-12")
+        extras: Additional key-value pairs
+    """
+    author: str | None = None
+    work: str | None = None
+    source: str | None = None
+    scope: str | None = None
+    extras: dict[str, Any] = field(default_factory=dict)
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "Metadata":
+        """Create Metadata from a dictionary (e.g., parsed YAML)."""
+        known_fields = {"author", "work", "source", "scope"}
+        extras = {k: v for k, v in data.items() if k not in known_fields}
+        # Ensure scope is always a string (YAML may parse "1" as int)
+        scope = data.get("scope")
+        if scope is not None:
+            scope = str(scope)
+        return cls(
+            author=data.get("author"),
+            work=data.get("work"),
+            source=data.get("source"),
+            scope=scope,
+            extras=extras,
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for YAML serialization."""
+        result: dict[str, Any] = {}
+        if self.author:
+            result["author"] = self.author
+        if self.work:
+            result["work"] = self.work
+        if self.source:
+            result["source"] = self.source
+        if self.scope:
+            result["scope"] = self.scope
+        result.update(self.extras)
+        return result
+@dataclass
+class Line:
+    """A single line of text.
+    Attributes:
+        text: The line content
+        number: Line number within the section (1-indexed)
+        speaker: Speaker name for dramatic texts (None for non-dialogue)
+        label: Editorial line label when it differs from number (e.g., "983a")
+        is_quote: True if the line is a cross-source quotation (``>`` markup),
+            i.e. verbatim text quoted from another author/work
+    """
+    text: str
+    number: int
+    speaker: str | None = None
+    label: str | None = None
+    is_quote: bool = False
+    def __str__(self) -> str:
+        return self.text
+@dataclass
+class Section:
+    """A section of text (poem, chapter, etc.).
+    Attributes:
+        id: Section identifier (number or name)
+        lines: List of lines in this section
+        is_numbered: Whether the ID is a number (vs. a name)
+        title: Optional section title
+        metadata: Section-specific metadata (supersedes document metadata)
+    Note:
+        Indexing with [] uses 1-based indexing to match scholarly citations.
+        Use section[1] for the first line, not section[0].
+    """
+    id: str
+    lines: list[Line] = field(default_factory=list)
+    is_numbered: bool = True
+    title: str | None = None
+    metadata: dict[str, Any] = field(default_factory=dict)
+    @property
+    def text(self) -> str:
+        """Return section text as a single string."""
+        return "\n".join(line.text for line in self.lines)
+    def __len__(self) -> int:
+        return len(self.lines)
+    def __getitem__(self, index: int) -> Line:
+        """Get line by 1-indexed number."""
+        if index < 1 or index > len(self.lines):
+            raise IndexError(f"Line {index} out of range (1-{len(self.lines)})")
+        return self.lines[index - 1]
+@dataclass
+class Document:
+    """A complete txtdown document.
+    Attributes:
+        metadata: Document metadata
+        sections: List of sections
+    Note:
+        Indexing with [] uses 1-based indexing to match scholarly citations.
+        Use doc[1] for the first section, not doc[0].
+        For citation-based access, use doc.get("1") or doc.get("1.3").
+    """
+    metadata: Metadata = field(default_factory=Metadata)
+    sections: list[Section] = field(default_factory=list)
+    def get(self, citation: str) -> Line | Section:
+        """Retrieve content by citation.
+        Args:
+            citation: Citation string like "2" (section) or "2.3" (section.line)
+        Returns:
+            Section if single-level citation, Line if two-level
+        Raises:
+            KeyError: If section or line not found
+        """
+        parts = citation.split(".")
+        # Find section
+        section_id = parts[0]
+        section = None
+        for s in self.sections:
+            if s.id == section_id:
+                section = s
+                break
+        if section is None:
+            raise KeyError(f"Section '{section_id}' not found")
+        # Return section or line
+        if len(parts) == 1:
+            return section
+        line_ref = parts[1]
+        # Try label lookup first (handles "983a" etc.)
+        for line in section.lines:
+            if line.label == line_ref:
+                return line
+        # Fall back to numeric line number
+        try:
+            line_num = int(line_ref)
+            return section[line_num]
+        except (ValueError, IndexError) as e:
+            msg = f"Line '{line_ref}' not found in section '{section_id}'"
+            raise KeyError(msg) from e
+    def __len__(self) -> int:
+        return len(self.sections)
+    def __getitem__(self, index: int) -> Section:
+        """Get section by 1-indexed number."""
+        if index < 1 or index > len(self.sections):
+            raise IndexError(f"Section {index} out of range (1-{len(self.sections)})")
+        return self.sections[index - 1]

txtdown/parser.py ADDED Viewed

@@ -0,0 +1,379 @@
+"""Parser for txtdown format."""
+import re
+import warnings
+from pathlib import Path
+from typing import Any
+import yaml
+from .models import Document, Line, Metadata, Section
+# Pattern for section separator: --- optionally followed by ID
+# Must be at start of line, at least 3 dashes
+SECTION_SEP_PATTERN = re.compile(r"^-{3,}\s*(.*)$")
+# Pattern for speaker markup: @SingleWord: speech text
+SPEAKER_PATTERN = re.compile(r"^@(\w+):\s*(.*)")
+# Pattern for cross-source quotation: > verbatim quoted text
+# The single optional space after > is part of the marker, not the text.
+QUOTE_PATTERN = re.compile(r"^>\s?(.*)")
+# Pattern for leading explicit line number: "6. text" or "983. text"
+LEADING_NUMBER_PATTERN = re.compile(r"^(\d+)\.\s+(.*)")
+# Pattern for trailing line label: "text         980" or "text         983a"
+# Requires 2+ whitespace chars before the label to avoid false positives
+TRAILING_LABEL_PATTERN = re.compile(r"^(.*?)\s{2,}(\d+[a-z]?)\s*$")
+def parse(source: str | Path, *, strict: bool = True) -> Document:
+    """Parse a txtdown file or string.
+    Args:
+        source: File path or txtdown content string
+        strict: When True (default), require a YAML front matter block with a
+            ``work`` field and raise ValueError if either is missing. Pass
+            ``strict=False`` to parse a fragment (e.g. a single line or section)
+            without metadata.
+    Returns:
+        Parsed Document object
+    Raises:
+        ValueError: In strict mode, when the front matter block or the ``work``
+            field is missing.
+    """
+    # Handle file path vs string
+    is_path = isinstance(source, Path)
+    is_path = is_path or (isinstance(source, str) and _looks_like_path(source))
+    if is_path:
+        path = Path(source)
+        content = path.read_text(encoding="utf-8")
+    else:
+        content = source
+    return _parse_content(content, strict=strict)
+def _looks_like_path(s: str) -> bool:
+    """Heuristic to detect if string is a file path."""
+    # Empty string is not a path
+    if not s or not s.strip():
+        return False
+    # If it starts with ---, it's content
+    if s.strip().startswith("---"):
+        return False
+    # If it contains newlines, it's content
+    if "\n" in s:
+        return False
+    # If it ends with .txtdown or .td, it's a path
+    if s.endswith((".txtd", ".txtdown")):
+        return True
+    # If it exists as a file (not directory), it's a path
+    p = Path(s)
+    return p.exists() and p.is_file()
+def _parse_content(content: str, strict: bool = True) -> Document:
+    """Parse txtdown content string."""
+    lines = content.split("\n")
+    # Extract front matter (body_start > 0 only when a closed block was found)
+    metadata, body_start = _parse_front_matter(lines)
+    had_front_matter = body_start > 0
+    # Parse body into sections
+    sections = _parse_sections(lines[body_start:])
+    doc = Document(metadata=metadata, sections=sections)
+    if strict:
+        _validate(doc, had_front_matter)
+    return doc
+def _validate(doc: Document, had_front_matter: bool) -> None:
+    """Enforce the required document structure in strict mode."""
+    if not had_front_matter:
+        raise ValueError(
+            "txtdown requires a YAML front matter block (--- ... ---). "
+            "Pass strict=False to parse a fragment without metadata."
+        )
+    if not doc.metadata.work:
+        raise ValueError(
+            "txtdown requires a 'work' field in the front matter. "
+            "Pass strict=False to parse without it."
+        )
+def _parse_front_matter(lines: list[str]) -> tuple[Metadata, int]:
+    """Parse YAML front matter.
+    Returns:
+        Tuple of (Metadata, index of first body line)
+    """
+    # Find opening ---
+    start = 0
+    while start < len(lines) and not lines[start].strip():
+        start += 1
+    if start >= len(lines) or lines[start].strip() != "---":
+        return Metadata(), 0
+    # Find closing ---
+    end = start + 1
+    while end < len(lines):
+        line = lines[end].strip()
+        if line == "---" or line == "...":
+            break
+        end += 1
+    if end >= len(lines):
+        # No closing delimiter - treat as no front matter
+        return Metadata(), 0
+    # Parse YAML
+    yaml_content = "\n".join(lines[start + 1 : end])
+    try:
+        data = yaml.safe_load(yaml_content) or {}
+    except yaml.YAMLError as e:
+        warnings.warn(f"Failed to parse YAML front matter: {e}", stacklevel=3)
+        data = {}
+    return Metadata.from_dict(data), end + 1
+def _parse_section_header(header: str) -> tuple[str | None, str | None]:
+    """Parse section header into ID and title.
+    Formats supported:
+        "99" -> id="99", title=None
+        "99: Title here" -> id="99", title="Title here"
+        "prooemium" -> id="prooemium", title=None
+        "prooemium: Introduction" -> id="prooemium", title="Introduction"
+    Returns:
+        Tuple of (id, title), either may be None.
+    """
+    if not header:
+        return None, None
+    # Check for "id: title" format
+    if ":" in header:
+        id_part, title_part = header.split(":", 1)
+        return id_part.strip(), title_part.strip() or None
+    return header.strip(), None
+def _is_metadata_line(line: str) -> bool:
+    """Check if a line looks like YAML metadata (key: value)."""
+    stripped = line.strip()
+    if not stripped:
+        return False
+    # Must have colon with content on both sides
+    if ":" not in stripped:
+        return False
+    # Split on first colon
+    key, _, value = stripped.partition(":")
+    # Key must be a simple identifier (no spaces, alphanumeric + underscore)
+    if not key or not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", key):
+        return False
+    return True
+def _parse_section_metadata(
+    lines: list[str], start_idx: int
+) -> tuple[dict[str, Any], int]:
+    """Parse section metadata from lines immediately following section separator.
+    Args:
+        lines: All lines in the section
+        start_idx: Index to start looking for metadata
+    Returns:
+        Tuple of (metadata dict, index of first content line)
+    """
+    metadata: dict[str, Any] = {}
+    idx = start_idx
+    # Skip any leading blank lines - metadata must immediately follow separator
+    # Actually, no - metadata must IMMEDIATELY abut the separator (no blank line)
+    # So if first line is blank, there's no section metadata
+    while idx < len(lines):
+        line = lines[idx]
+        # Blank line signals end of metadata, start of content
+        if not line.strip():
+            break
+        # Check if this looks like metadata
+        if _is_metadata_line(line):
+            key, _, value = line.strip().partition(":")
+            value = value.strip()
+            # Try to parse as YAML-ish value (bool, int, etc.)
+            if value.lower() == "true":
+                metadata[key] = True
+            elif value.lower() == "false":
+                metadata[key] = False
+            elif value.isdigit():
+                metadata[key] = int(value)
+            else:
+                metadata[key] = value
+            idx += 1
+        else:
+            # Not metadata - this is content
+            break
+    return metadata, idx
+def _parse_sections(lines: list[str]) -> list[Section]:
+    """Parse body into sections."""
+    sections: list[Section] = []
+    current_lines: list[str] = []
+    current_id: str | None = None
+    current_title: str | None = None
+    current_metadata: dict[str, Any] = {}
+    section_counter = 0
+    def has_content(lines: list[str]) -> bool:
+        """Check if lines have any non-whitespace content."""
+        return any(line.strip() for line in lines)
+    i = 0
+    while i < len(lines):
+        line = lines[i]
+        match = SECTION_SEP_PATTERN.match(line)
+        if match:
+            # Save previous section only if it has actual content
+            if has_content(current_lines):
+                section_counter += 1
+                section_id = current_id if current_id else str(section_counter)
+                sec = _make_section(
+                    section_id, current_lines, current_title, current_metadata
+                )
+                sections.append(sec)
+            current_lines = []
+            current_metadata = {}
+            # Extract ID and title from separator line
+            header = match.group(1).strip()
+            current_id, current_title = _parse_section_header(header)
+            # Check for section metadata immediately following separator
+            i += 1
+            if i < len(lines):
+                current_metadata, content_start = _parse_section_metadata(lines, i)
+                i = content_start
+                continue  # Don't increment i again at end of loop
+        else:
+            current_lines.append(line)
+        i += 1
+    # Don't forget the last section
+    if has_content(current_lines):
+        section_counter += 1
+        section_id = current_id if current_id else str(section_counter)
+        sec = _make_section(
+            section_id, current_lines, current_title, current_metadata
+        )
+        sections.append(sec)
+    return sections
+def _make_section(
+    section_id: str,
+    raw_lines: list[str],
+    title: str | None = None,
+    metadata: dict[str, Any] | None = None,
+) -> Section:
+    """Create a Section from raw lines."""
+    # Strip leading/trailing blank lines
+    while raw_lines and not raw_lines[0].strip():
+        raw_lines.pop(0)
+    while raw_lines and not raw_lines[-1].strip():
+        raw_lines.pop()
+    # Create numbered Line objects (only for non-empty lines)
+    lines: list[Line] = []
+    last_number = 0
+    for text in raw_lines:
+        if text.strip():  # Skip blank lines for numbering
+            # Cross-source quotation: > marks verbatim text quoted from another
+            # source. Quoted text is preserved as-is (no number/label extraction).
+            quote_match = QUOTE_PATTERN.match(text.lstrip())
+            if quote_match:
+                number = last_number + 1
+                last_number = number
+                lines.append(
+                    Line(text=quote_match.group(1), number=number, is_quote=True)
+                )
+                continue
+            text, number, label = _extract_line_numbering(text, last_number)
+            last_number = number
+            speaker_match = SPEAKER_PATTERN.match(text)
+            if speaker_match:
+                speaker = speaker_match.group(1)
+                speech = speaker_match.group(2)
+                lines.append(
+                    Line(text=speech, number=number, speaker=speaker, label=label)
+                )
+            else:
+                lines.append(Line(text=text, number=number, label=label))
+    # Determine if ID is numeric
+    is_numbered = section_id.isdigit()
+    return Section(
+        id=section_id,
+        lines=lines,
+        is_numbered=is_numbered,
+        title=title,
+        metadata=metadata or {},
+    )
+def _extract_line_numbering(
+    text: str, last_number: int
+) -> tuple[str, int, str | None]:
+    """Extract explicit line numbering from a line of text.
+    Handles three styles:
+    - Leading prefix: "6. suave etiam..." → number=6, text="suave etiam..."
+    - Trailing label: "servo id;         980" → number=auto, label="980"
+    - Implicit: auto-increment from last_number
+    Args:
+        text: Raw line text
+        last_number: Previous line's number (for auto-increment)
+    Returns:
+        Tuple of (cleaned_text, number, label)
+    """
+    label: str | None = None
+    # Check for trailing label first (e.g., "text         980" or "983a")
+    trailing_match = TRAILING_LABEL_PATTERN.match(text)
+    if trailing_match:
+        text = trailing_match.group(1).rstrip()
+        label = trailing_match.group(2)
+    # Check for leading explicit number (e.g., "6. text")
+    leading_match = LEADING_NUMBER_PATTERN.match(text)
+    if leading_match:
+        number = int(leading_match.group(1))
+        text = leading_match.group(2)
+        return text, number, label
+    # Implicit: auto-increment
+    number = last_number + 1
+    return text, number, label

txtdown/writer.py ADDED Viewed

@@ -0,0 +1,106 @@
+"""Writer for txtdown format."""
+from pathlib import Path
+import yaml
+from .models import Document
+def write(doc: Document, path: str | Path | None = None) -> str:
+    """Write a Document to txtdown format.
+    Args:
+        doc: Document to serialize
+        path: Optional file path to write to
+    Returns:
+        The txtdown content as a string
+    """
+    content = _serialize(doc)
+    if path is not None:
+        Path(path).write_text(content, encoding="utf-8")
+    return content
+def _serialize(doc: Document) -> str:
+    """Serialize Document to txtdown string."""
+    parts: list[str] = []
+    # Front matter
+    meta_dict = doc.metadata.to_dict()
+    if meta_dict:
+        parts.append("---")
+        # Use yaml.dump with default_flow_style=False for readable output
+        yaml_str = yaml.dump(meta_dict, default_flow_style=False, allow_unicode=True)
+        parts.append(yaml_str.rstrip())
+        parts.append("---")
+        parts.append("")
+    # Sections
+    for i, section in enumerate(doc.sections):
+        # Section separator (except before first section)
+        if i > 0:
+            parts.append("")
+            parts.append("---")
+        # Add explicit ID if section has non-numeric or non-sequential ID
+        expected_id = str(i + 1)
+        needs_header = section.id != expected_id or section.title
+        if needs_header:
+            # Build header: "--- id" or "--- id: title"
+            if section.title:
+                header = f"--- {section.id}: {section.title}"
+            else:
+                header = f"--- {section.id}"
+            # Rewrite the separator with ID/title
+            if i > 0:
+                parts[-1] = header
+            else:
+                # First section with explicit ID
+                parts.append(header)
+        # Section metadata (immediately after separator, no blank line)
+        if section.metadata:
+            for key, value in section.metadata.items():
+                if isinstance(value, bool):
+                    value_str = "true" if value else "false"
+                else:
+                    value_str = str(value)
+                parts.append(f"{key}: {value_str}")
+        # Blank line before content
+        parts.append("")
+        # Section content
+        auto_number = 0
+        for line in section.lines:
+            auto_number += 1
+            # Build text with speaker or quote markup if needed
+            if line.is_quote:
+                text = f"> {line.text}"
+            elif line.speaker:
+                text = f"@{line.speaker}: {line.text}"
+            else:
+                text = line.text
+            # Add leading prefix if number differs from auto-increment
+            if line.number != auto_number:
+                text = f"{line.number}. {text}"
+                auto_number = line.number
+            # Add trailing label if present
+            if line.label:
+                text = f"{text}         {line.label}"
+            parts.append(text)
+    # Ensure trailing newline
+    content = "\n".join(parts)
+    if not content.endswith("\n"):
+        content += "\n"
+    return content

txtdown-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,214 @@
+Metadata-Version: 2.4
+Name: txtdown
+Version: 0.2.0
+Summary: Minimal markup for Latin text collections
+Author: Patrick J. Burns
+License: MIT
+Project-URL: Homepage, https://github.com/diyclassics/txtdown
+Project-URL: Repository, https://github.com/diyclassics/txtdown
+Project-URL: Changelog, https://github.com/diyclassics/txtdown/blob/main/CHANGELOG.md
+Project-URL: Issues, https://github.com/diyclassics/txtdown/issues
+Keywords: latin,markup,text,philology,digital-humanities,nlp
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Text Processing :: Markup
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pyyaml>=6.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0; extra == "dev"
+Requires-Dist: ruff>=0.1.0; extra == "dev"
+Dynamic: license-file
+# txtdown
+Minimal markup for Latin text collections using human-readable markup with inferrable hierarchical structure for scholarly citation.
+## Installation
+```bash
+pip install git+https://github.com/diyclassics/txtdown.git
+```
+## Quick Start
+```python
+from txtdown import parse, write
+# Parse a .txtd file
+doc = parse("sulpicia.txtd")
+# Access metadata
+print(doc.metadata.author)  # "Sulpicia"
+print(doc.metadata.work)    # "Epistulae"
+# Access by citation
+line = doc.get("2.3")       # Section 2, line 3
+section = doc.get("1")      # Entire section 1
+# Iterate sections and lines
+for section in doc.sections:
+    for line in section.lines:
+        print(f"{section.id}.{line.number}: {line.text}")
+# Write back to file (round-trip safe)
+write(doc, "output.txtd")
+```
+## Format Specification
+A `.txtd` file consists of a YAML front matter block followed by sections separated by horizontal rules (`---`). The front matter block is required and must include a `work` field; `parse()` raises `ValueError` otherwise. To parse a fragment without metadata (e.g. a single line or section), pass `strict=False`.
+### Basic Structure
+```
+---
+author: Sulpicia
+work: Epistulae
+source: https://thelatinlibrary.com/sulpicia.html
+---
+--- 1
+Tandem venit amor, qualem texisse pudori
+    quam nudasse alicui sit mihi fama magis.
+exorata meis illum Cytherea Camenis
+    attulit in nostrum deposuitque sinum.
+etc.
+--- 2
+Invisus natalis adest, qui rure molesto
+    et sine Cerintho tristis agendus erit.
+etc.
+```
+### Sections
+- Sections are separated by `---` (three or more hyphens)
+- Sections auto-number (1, 2, 3...) unless given explicit IDs (best practice)
+- Explicit section ID: `--- prooemium` or `--- 1a`
+- Section with title: `--- prooemium: Introduction`
+### Lines (for verse)
+- Lines auto-number within each section (1, 2, 3...)
+- Blank lines don't count toward line numbering
+- Access via citation: `doc.get("2.3")` returns section 2, line 3
+**Line indentation** (`mode: verse`): Leading whitespace indicates poetic structure (e.g., pentameter lines in elegiac couplets):
+```
+Tandem venit amor, qualem texisse pudori
+    quam nudasse alicui sit mihi fama magis.
+```
+The parser preserves indentation. For NLP, TxtdownReader strips leading whitespace when joining lines for sentence segmentation.
+### Speaker Markup (dramatic texts)
+For dramatic texts, use `@Speaker:` at the start of a line to mark speaker attribution:
+```
+@Diocletianus: Quid sibi vult ista, quae vos agitat, fatuitas?
+@Agapes: quod signum fatuitatis nobis inesse deprehendis?
+@Diocletianus: Evidens magnumque.
+```
+The parser extracts the speaker name into `line.speaker` and keeps `line.text` as pure speech text — ideal for NLP pipelines that need clean text without markup.
+```python
+doc = parse("dulcitius.txtd")
+for line in doc.sections[0].lines:
+    print(f"{line.speaker}: {line.text}")
+# Diocletianus: Quid sibi vult ista...
+```
+Non-speaker lines (stage directions, prose) have `line.speaker = None`. Speaker markup round-trips through `write()`.
+### Cross-source Quotation
+Use `>` at the start of a line to mark text quoted verbatim from *another* literary
+source — an author embedding a poet's verse in their own prose, for example. This
+repurposes the familiar blockquote convention for the citational habits of classical texts:
+```
+Quamquam Ennius recte:
+> Amicus certus in re incerta cernitur,
+tamen haec duo levitatis et infirmitatis plerosque convincunt.
+```
+The parser strips the `>` marker and flags the line with `line.is_quote = True`, keeping
+`line.text` as clean quoted text. Consecutive `>` lines form a multi-line quotation:
+```
+> Negat quis, nego; ait, aio; postremo imperavi egomet mihi
+> Omnia adsentari,
+```
+```python
+doc = parse("cicero-de-amicitia.txtd")
+quotes = [line.text for s in doc.sections for line in s.lines if line.is_quote]
+# ['Amicus certus in re incerta cernitur,', ...]
+```
+Non-quote lines have `line.is_quote = False`. Quotation markup round-trips through `write()`.
+See `examples/cicero-de-amicitia.txtd` (Cicero quoting Ennius and Terence) and
+`examples/augustine-civ-dei-1.2.txtd` (Augustine quoting Virgil).
+### Metadata
+| Field | Description |
+|-------|-------------|
+| `work` | Work title (**required**) |
+| `author` | Author name |
+| `source` | Source URL or reference |
+| `scope` | Portion of work in file (e.g., `1-6` for books 1-6) |
+Additional fields are preserved in `metadata.extras`.
+## API Reference
+### Functions
+- `parse(path_or_content: str, *, strict: bool = True) -> Document` — Parse a `.txtd` file or string. Strict by default: raises `ValueError` if the front matter block or `work` field is missing; pass `strict=False` for fragments.
+- `write(doc: Document, path: str | None) -> str` — Write to file if path given; always returns serialized string
+### Classes
+- `Document` — Container with `metadata: Metadata` and `sections: list[Section]`
+- `Section` — Container with `id: str`, `lines: list[Line]`, optional `title` and `metadata`
+- `Line` — Container with `text: str`, `number: int`, optional `speaker: str | None` and `label: str | None`, and `is_quote: bool` (cross-source quotation)
+- `Metadata` — Container with `author`, `work`, `source`, `scope`, and `extras` dict
+## Development
+```bash
+# Clone and install dev dependencies
+git clone https://github.com/diyclassics/txtdown.git
+cd txtdown
+pip install -e ".[dev]"
+# Run tests
+pytest tests/ -v
+# Run with coverage
+pytest tests/ --cov=txtdown --cov-report=term-missing
+```
+## Project History
+The idea for txtdown originated in January 2018, inspired by the need for a document format for Latin text collections that balanced the simplicity of plaintext with the more involved markup of XML-based formats like TEI. The goal was to create a format that is both human-readable and computer-tractable, supporting hierarchical structures, fundamental annotations, and embedded metadata. Txtdown has since been influenced by ongoing work on annotation projects such as the [Representing Women Authorship in the Latin Treebanks (RWALT)](https://diyclassics.github.io/rwalt-site/) project.
+## License
+MIT

txtdown-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+txtdown/__init__.py,sha256=Y8g1ClbPS1UFgrV612MATjF_WheoKSjw4tVX1u2apJI,628
+txtdown/models.py,sha256=IiroQFzLjEQ5XCdNBS34ECfPPrZevYKfOVZ_XQlIXJs,5552
+txtdown/parser.py,sha256=SFc7tKC73-6VN0Y6E1qWuAJcXS2iBlQAf2Dz0OUF3RI,12132
+txtdown/writer.py,sha256=6RaFTWSa8i7DdDbCkM8jYf7cLChrxHNNdBES2ZlOqHw,3124
+txtdown-0.2.0.dist-info/licenses/LICENSE,sha256=Fh2wAEotBNBqY258xRJU-fCwlxsVQwvxpT2V1uPKwfs,1078
+txtdown-0.2.0.dist-info/METADATA,sha256=gBqwqVYHY7SVyV2gYYUwbqJJCRSSBjUdZgemvLbTHYI,7319
+txtdown-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+txtdown-0.2.0.dist-info/top_level.txt,sha256=e7Jb95B7fl_8SC-WvP20XR5iQnNGgFy1_AZ9LkbHwBA,8
+txtdown-0.2.0.dist-info/RECORD,,

txtdown-0.2.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

txtdown-0.2.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2018-2026 Patrick J. Burns
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

txtdown-0.2.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ txtdown