PyPI - sub-checker - Versions diffs - 0.1.0__py3-none-any.whl - Mend

sub-checker 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

sub_checker/__init__.py +3 -0
sub_checker/agents/__init__.py +0 -0
sub_checker/agents/base.py +448 -0
sub_checker/agents/citation_claim.py +142 -0
sub_checker/agents/citation_exist.py +100 -0
sub_checker/agents/citation_format.py +94 -0
sub_checker/agents/figure_table.py +62 -0
sub_checker/agents/journal_guidelines.py +110 -0
sub_checker/agents/logic.py +45 -0
sub_checker/agents/typo_grammar.py +58 -0
sub_checker/api.py +239 -0
sub_checker/cli.py +195 -0
sub_checker/config.py +113 -0
sub_checker/env.py +23 -0
sub_checker/eval_runner.py +319 -0
sub_checker/harness/__init__.py +0 -0
sub_checker/harness/dedup.py +86 -0
sub_checker/harness/deterministic.py +284 -0
sub_checker/harness/reviewer.py +409 -0
sub_checker/i18n.py +98 -0
sub_checker/logging_config.py +175 -0
sub_checker/models.py +98 -0
sub_checker/orchestrator.py +278 -0
sub_checker/parsers/__init__.py +0 -0
sub_checker/parsers/docx_parser.py +185 -0
sub_checker/pipeline.py +73 -0
sub_checker/reporters/__init__.py +0 -0
sub_checker/reporters/html_reporter.py +531 -0
sub_checker/reporters/json_reporter.py +55 -0
sub_checker/reporters/markdown_reporter.py +60 -0
sub_checker/reporters/terminal.py +71 -0
sub_checker/services/__init__.py +0 -0
sub_checker/services/citation_verifier.py +331 -0
sub_checker/services/crossref.py +106 -0
sub_checker/services/http_client.py +159 -0
sub_checker/services/pubmed.py +106 -0
sub_checker/services/semantic_scholar.py +87 -0
sub_checker/services/web.py +124 -0
sub_checker/tools/__init__.py +0 -0
sub_checker/tools/filesystem_tools.py +63 -0
sub_checker/tools/manuscript_tools.py +239 -0
sub_checker/tools/pubmed_tools.py +132 -0
sub_checker/tools/web_tools.py +59 -0
sub_checker-0.1.0.dist-info/METADATA +193 -0
sub_checker-0.1.0.dist-info/RECORD +49 -0
sub_checker-0.1.0.dist-info/WHEEL +5 -0
sub_checker-0.1.0.dist-info/entry_points.txt +3 -0
sub_checker-0.1.0.dist-info/licenses/LICENSE +21 -0
sub_checker-0.1.0.dist-info/top_level.txt +1 -0

sub_checker/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Sub-Checker: Pre-submission manuscript checker powered by Claude agents."""
+__version__ = "0.1.0"

sub_checker/agents/__init__.py ADDED Viewed

File without changes

sub_checker/agents/base.py ADDED Viewed

@@ -0,0 +1,448 @@
+from __future__ import annotations
+import copy
+import logging
+import time
+import uuid
+from abc import ABC, abstractmethod
+from datetime import UTC, datetime
+from functools import cached_property
+from pathlib import Path
+from typing import Any, cast
+import anthropic
+from anthropic.types import MessageParam, ToolParam
+from sub_checker.config import Config
+from sub_checker.logging_config import _DEFAULT_COT_DIR, AgentCOTLogger
+from sub_checker.models import (
+    CheckerResult,
+    Finding,
+    Manuscript,
+    Severity,
+    TokenUsage,
+)
+logger = logging.getLogger("sub_checker.agents")
+# Safety cap on the agentic loop: prevents runaway token spend if the model
+# never reaches end_turn.
+MAX_ITERATIONS = 30
+def supports_adaptive_thinking(model: str) -> bool:
+    """Adaptive thinking is available on Opus 4.6+ / Sonnet 4.6 / Fable 5."""
+    return any(
+        marker in model
+        for marker in ("opus-4-6", "opus-4-7", "opus-4-8", "sonnet-4-6", "fable")
+    )
+def set_message_cache_breakpoint(messages: list[MessageParam]) -> None:
+    """Mark the last content block of the last message with cache_control.
+    Moves the single message-level cache breakpoint forward each iteration so
+    the entire conversation prefix is served from the prompt cache. Combined
+    with the system + tools breakpoints this stays within the 4-breakpoint
+    API limit.
+    """
+    for msg in messages:
+        content = msg.get("content")
+        if isinstance(content, list):
+            for block in content:
+                if isinstance(block, dict):
+                    block.pop("cache_control", None)
+    last_content = messages[-1].get("content")
+    if isinstance(last_content, list) and last_content and isinstance(last_content[-1], dict):
+        last_block = cast("dict[str, Any]", last_content[-1])
+        last_block["cache_control"] = {"type": "ephemeral"}
+class BaseCheckerAgent(ABC):
+    """Base class for all checker agents.
+    Each agent runs an agentic loop:
+    1. Send system prompt + tools + initial message to Claude
+    2. If Claude returns tool_use → execute tool → feed result back → repeat
+    3. If Claude returns text (done) → collect all add_finding calls → return CheckerResult
+    """
+    name: str = "base"
+    # Reasoning depth / token budget. Mechanical checkers (pattern matching,
+    # cross-referencing) override this down to save output tokens; judgment-
+    # heavy checkers keep "high". GA on Sonnet 4.6 and Opus 4.6+.
+    effort: str = "high"
+    def __init__(self, model: str = "claude-opus-4-8"):
+        self.model = model
+        self._findings: list[Finding] = []
+        self._token_usage = TokenUsage()
+        self._manuscript: Manuscript | None = None
+    @cached_property
+    def system_prompt(self) -> str:
+        prompt_path = Path(__file__).parent / "prompts" / f"{self.name}.txt"
+        if prompt_path.exists():
+            return prompt_path.read_text()
+        return self._default_system_prompt()
+    @abstractmethod
+    def _default_system_prompt(self) -> str:
+        """Fallback system prompt if no .txt file exists."""
+        ...
+    @abstractmethod
+    def get_tools(self) -> list[dict]:
+        """Return tool definitions for this agent."""
+        ...
+    @abstractmethod
+    async def handle_tool_call(self, tool_name: str, tool_input: dict) -> str:
+        """Execute a tool and return the result as a string."""
+        ...
+    def _build_initial_message(self, manuscript: Manuscript, config: Config) -> str:
+        """Build the initial user message with task context."""
+        today = datetime.now(UTC).strftime("%Y-%m-%d")
+        parts = [
+            f"Today's date: {today}",
+            f'Please check the following manuscript: "{manuscript.title}"',
+        ]
+        if config.journal:
+            parts.append(f"Target journal: {config.journal}")
+        else:
+            parts.append(
+                "Target journal: NOT SPECIFIED. "
+                "Do NOT assume any journal-specific requirements (formatting, "
+                "citation style, word limits). Only check internal consistency "
+                "within the manuscript itself."
+            )
+        parts.append(
+            f"The manuscript has {len(manuscript.sections)} sections "
+            f"and {len(manuscript.paragraphs)} paragraphs."
+        )
+        parts.append("Use the provided tools to read the manuscript and report any findings.")
+        parts.append(
+            "When calling add_finding, ALWAYS set claim_type (and claimed_date / "
+            "ref_number where applicable) — the validation harness uses these "
+            "fields to fact-check findings deterministically."
+        )
+        if config.output_lang == "zh-TW":
+            parts.append(
+                "\nIMPORTANT: Write ALL your findings (message, suggestion) in Traditional Chinese (繁體中文). "
+                "The manuscript itself is in English, but your output in add_finding must be in 繁體中文. "
+                "Example: message='引用 [15] 在文中被引用但參考文獻列表中缺失', "
+                "suggestion='請在參考文獻列表中新增 [15] 或修正引用編號'"
+            )
+        return "\n".join(parts)
+    def _handle_add_finding(self, tool_input: dict) -> str:
+        """Process an add_finding tool call."""
+        severity_str = tool_input.get("severity", "warning").upper()
+        try:
+            severity = Severity[severity_str]
+        except KeyError:
+            severity = Severity.WARNING
+        ref_number = tool_input.get("ref_number")
+        if not isinstance(ref_number, int):
+            ref_number = None
+        finding = Finding(
+            checker=self.name,
+            severity=severity,
+            message=tool_input.get("message", ""),
+            location=tool_input.get("location"),
+            suggestion=tool_input.get("suggestion"),
+            context=tool_input.get("context"),
+            claim_type=tool_input.get("claim_type"),
+            claimed_date=tool_input.get("claimed_date"),
+            ref_number=ref_number,
+        )
+        self._findings.append(finding)
+        return f"Finding recorded: [{severity.value}] {finding.message}"
+    def _note_incomplete(self, message: str) -> None:
+        """Record that this check is incomplete.
+        validation_status="confirmed" keeps the note out of the harness/
+        reviewer (which could otherwise filter it as "not a manuscript
+        issue"), so the user always sees that coverage was partial.
+        """
+        self._findings.append(
+            Finding(
+                checker=self.name,
+                severity=Severity.INFO,
+                message=message,
+                claim_type="other",
+                validation_status="confirmed",
+                validation_note="[harness] incomplete-run notice, not reviewed",
+            )
+        )
+    async def run(self, manuscript: Manuscript, config: Config) -> CheckerResult:
+        """Execute the agent loop with full logging."""
+        self._manuscript = manuscript
+        self._findings = []
+        self._token_usage = TokenUsage()
+        start = time.monotonic()
+        run_id = uuid.uuid4().hex[:8]
+        if config.cot_dir == "disabled":
+            cot_dir = None  # explicitly disable COT file output
+        elif config.cot_dir:
+            cot_dir = Path(config.cot_dir)  # custom directory
+        else:
+            cot_dir = _DEFAULT_COT_DIR  # None in config → use default
+        cot = AgentCOTLogger(
+            agent_name=self.name,
+            run_id=run_id,
+            cot_dir=cot_dir,
+        )
+        logger.info("Starting agent '%s' (run_id=%s)", self.name, run_id)
+        # Deep-copy: tool definitions are shared module constants and we add
+        # a cache_control marker to the last one (caches system + tools prefix).
+        tools = cast(list[ToolParam], copy.deepcopy(self.get_tools()))
+        if tools:
+            tools[-1]["cache_control"] = {"type": "ephemeral"}  # type: ignore[typeddict-unknown-key]
+        system_blocks = [
+            {
+                "type": "text",
+                "text": self.system_prompt,
+                "cache_control": {"type": "ephemeral"},
+            }
+        ]
+        messages = cast(
+            list[MessageParam],
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": self._build_initial_message(manuscript, config),
+                        }
+                    ],
+                }
+            ],
+        )
+        iteration = 0
+        try:
+            async with anthropic.AsyncAnthropic() as client:
+                while True:
+                    iteration += 1
+                    if iteration > MAX_ITERATIONS:
+                        logger.warning(
+                            "[%s] Hit max iterations (%d), stopping agent loop",
+                            self.name,
+                            MAX_ITERATIONS,
+                        )
+                        self._note_incomplete(
+                            f"Check stopped after reaching the {MAX_ITERATIONS}-iteration "
+                            "safety cap — some items may not have been checked."
+                        )
+                        break
+                    logger.debug("[%s] Iteration %d: sending API request", self.name, iteration)
+                    cot.log_request(messages, tools)
+                    set_message_cache_breakpoint(messages)
+                    extra: dict[str, Any] = {}
+                    if supports_adaptive_thinking(self.model):
+                        # Adaptive thinking + per-checker effort: low effort
+                        # keeps thinking minimal (and avoids reasoning leaking
+                        # into the visible response on Opus 4.8).
+                        extra["thinking"] = {"type": "adaptive"}
+                        extra["output_config"] = {"effort": self.effort}
+                    response = await client.messages.create(
+                        model=self.model,
+                        max_tokens=16000,
+                        system=system_blocks,  # type: ignore[arg-type]
+                        tools=tools,
+                        messages=messages,
+                        **extra,
+                    )
+                    self._token_usage.input_tokens += response.usage.input_tokens
+                    self._token_usage.output_tokens += response.usage.output_tokens
+                    self._token_usage.cache_creation_input_tokens += (
+                        response.usage.cache_creation_input_tokens or 0
+                    )
+                    self._token_usage.cache_read_input_tokens += (
+                        response.usage.cache_read_input_tokens or 0
+                    )
+                    cot.log_response(str(response.stop_reason), response.content)
+                    if response.stop_reason == "end_turn":
+                        logger.debug("[%s] Agent finished (end_turn)", self.name)
+                        break
+                    truncated = response.stop_reason == "max_tokens"
+                    content_blocks = list(response.content)
+                    if truncated:
+                        logger.warning(
+                            "[%s] Response truncated at max_tokens on iteration %d",
+                            self.name,
+                            iteration,
+                        )
+                        # A trailing tool_use may carry incomplete input —
+                        # never execute a half-formed call (e.g. a cut-off
+                        # add_finding would record a garbage finding).
+                        if content_blocks:
+                            last = content_blocks[-1]
+                            if last.type == "tool_use":
+                                content_blocks.pop()
+                                logger.warning(
+                                    "[%s] Dropped truncated tool_use '%s'", self.name, last.name
+                                )
+                    # Process tool calls
+                    tool_results = []
+                    for block in content_blocks:
+                        if block.type == "tool_use":
+                            logger.debug(
+                                "[%s] Tool call: %s(%s)", self.name, block.name, block.input
+                            )
+                            if block.name == "add_finding":
+                                result = self._handle_add_finding(block.input)
+                                inp = block.input
+                                cot.log_finding(
+                                    str(inp.get("severity", "warning")),
+                                    str(inp.get("message", "")),
+                                )
+                            else:
+                                try:
+                                    result = await self.handle_tool_call(block.name, block.input)
+                                except Exception as e:
+                                    result = f"Tool error: {e}"
+                                    logger.error(
+                                        "[%s] Tool '%s' failed: %s",
+                                        self.name,
+                                        block.name,
+                                        e,
+                                        exc_info=True,
+                                    )
+                                    cot.log_error(f"Tool '{block.name}' failed: {e}", e)
+                            cot.log_tool_result(block.name, block.id, result)
+                            tool_results.append(
+                                {"type": "tool_result", "tool_use_id": block.id, "content": result}
+                            )
+                    if not tool_results:
+                        if truncated:
+                            self._note_incomplete(
+                                "Check may be incomplete: the model response was "
+                                "truncated at the token limit before any tool call."
+                            )
+                        logger.debug("[%s] No tool results, ending loop", self.name)
+                        break
+                    messages.append(
+                        cast(MessageParam, {"role": "assistant", "content": content_blocks})
+                    )
+                    messages.append(cast(MessageParam, {"role": "user", "content": tool_results}))
+        except anthropic.APIError as e:
+            # Keep findings already collected (paid for) instead of discarding
+            # the whole run; flag the result as incomplete.
+            logger.error("[%s] API error, returning partial result: %s", self.name, e)
+            cot.log_error(f"API error (partial result): {e}", e)
+            self._note_incomplete(
+                f"Check incomplete: the API failed after {iteration} iterations ({e}). "
+                f"{len(self._findings)} finding(s) collected before the failure are kept."
+            )
+        except Exception as e:
+            logger.error("[%s] Agent failed: %s", self.name, e, exc_info=True)
+            cot.log_error(f"Agent failed: {e}", e)
+            raise
+        finally:
+            elapsed = time.monotonic() - start
+            cot_path = cot.save()
+            logger.info(
+                "[%s] Completed in %.1fs (%d findings, %d iterations). COT: %s",
+                self.name,
+                elapsed,
+                len(self._findings),
+                iteration,
+                cot_path,
+            )
+        return CheckerResult(
+            checker_name=self.name,
+            findings=list(self._findings),
+            elapsed_seconds=elapsed,
+            token_usage=self._token_usage,
+            cot_entries=cot.entries,
+            model=self.model,
+        )
+# Shared tool definition for add_finding (all agents use this)
+ADD_FINDING_TOOL = {
+    "name": "add_finding",
+    "description": "Report a finding/issue found in the manuscript.",
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "severity": {
+                "type": "string",
+                "enum": ["error", "warning", "info"],
+                "description": "Severity: error (must fix), warning (should review), info (suggestion)",
+            },
+            "message": {
+                "type": "string",
+                "description": "Description of the issue found",
+            },
+            "location": {
+                "type": "string",
+                "description": "Where in the manuscript (e.g. 'Section: Methods, Paragraph 5')",
+            },
+            "suggestion": {
+                "type": "string",
+                "description": "How to fix the issue",
+            },
+            "context": {
+                "type": "string",
+                "description": "Surrounding text snippet for context",
+            },
+            "claim_type": {
+                "type": "string",
+                "enum": [
+                    "future_date",
+                    "uncited_reference",
+                    "missing_reference",
+                    "inconsistency",
+                    "other",
+                ],
+                "description": (
+                    "Machine-checkable claim category. Use 'future_date' when the finding "
+                    "claims a date is in the future, 'uncited_reference' when a reference "
+                    "list entry is claimed to never be cited in the text, "
+                    "'missing_reference' when a citation number is claimed to be absent "
+                    "from the reference list, 'inconsistency' for format/style "
+                    "inconsistency claims, 'other' otherwise. ALWAYS set this field — it "
+                    "lets the validation harness fact-check the finding deterministically."
+                ),
+            },
+            "claimed_date": {
+                "type": "string",
+                "description": (
+                    "For claim_type='future_date': the date the claim is about, "
+                    "as 'YYYY' or 'YYYY-MM' (e.g. '2025-11')"
+                ),
+            },
+            "ref_number": {
+                "type": "integer",
+                "description": (
+                    "For citation-related claims: the reference/citation number "
+                    "the finding concerns (e.g. 23 for reference [23])"
+                ),
+            },
+        },
+        "required": ["severity", "message"],
+    },
+}

sub_checker/agents/citation_claim.py ADDED Viewed

@@ -0,0 +1,142 @@
+from __future__ import annotations
+import logging
+from sub_checker.agents.base import ADD_FINDING_TOOL, BaseCheckerAgent
+from sub_checker.config import Config
+from sub_checker.models import Manuscript
+from sub_checker.services.citation_verifier import (
+    format_verification_report,
+    verify_references,
+)
+from sub_checker.services.crossref import CrossrefClient
+from sub_checker.services.pubmed import PubMedClient
+from sub_checker.services.semantic_scholar import SemanticScholarClient
+from sub_checker.tools.manuscript_tools import (
+    TOOL_GET_REFERENCE_LIST,
+    TOOL_READ_SECTION,
+    get_reference_list,
+    read_section,
+    reference_entries,
+)
+from sub_checker.tools.pubmed_tools import (
+    TOOL_GET_ABSTRACT,
+    TOOL_SEARCH_LITERATURE,
+    get_abstract,
+    search_literature,
+)
+logger = logging.getLogger("sub_checker.agents.citation_claim")
+class CitationClaimAgent(BaseCheckerAgent):
+    name = "citation_claim"
+    def __init__(self, model: str = "claude-opus-4-8"):
+        super().__init__(model=model)
+        self._pubmed: PubMedClient | None = None
+        self._s2: SemanticScholarClient | None = None
+        self._verification_report: str = ""
+    def _default_system_prompt(self) -> str:
+        return (
+            "You are a citation verification expert. Your job is to verify that EVERY "
+            "citation in the manuscript is supported by the actual referenced paper.\n\n"
+            "## Pre-Verification Report\n\n"
+            "A MULTI-SOURCE VERIFICATION REPORT is provided in your initial message.\n"
+            "This report was generated by querying PubMed, Semantic Scholar, AND Crossref\n"
+            "in parallel, then cross-validating results. Each reference has a confidence\n"
+            "score and verification status.\n\n"
+            "TRUST this report for reference existence. Focus your effort on:\n"
+            "- Verifying that claims in the text are actually supported by the cited papers\n"
+            "- For 'verified' references: use get_abstract to compare claim vs abstract\n"
+            "- For 'not_found' references: note this in your finding (may be very recent)\n\n"
+            "## Workflow\n\n"
+            "1. Review the pre-verification report for reference existence/validity\n"
+            "2. Read the reference list and each manuscript section\n"
+            "3. For EACH citation:\n"
+            "   a. Identify the CLAIM being made\n"
+            "   b. Check the pre-verification status\n"
+            "   c. For verified/likely_valid refs: use get_abstract to check claim support\n"
+            "   d. Report your verdict via add_finding\n\n"
+            "## Verdict Categories\n\n"
+            "- **SUPPORTS** (severity=info): Abstract clearly supports the claim\n"
+            "- **PARTIALLY_SUPPORTS** (severity=info): Related but doesn't fully address\n"
+            "- **CONTRADICTS** (severity=error): Abstract contradicts the claim\n"
+            "- **INSUFFICIENT** (severity=warning): Not enough info to verify\n"
+            "- **NOT_FOUND** (severity=warning): Paper not found (check pre-verification)\n"
+            "- **NO_ABSTRACT** (severity=warning): Paper found but no abstract available\n\n"
+            "## Important Rules\n\n"
+            "- Verify EVERY citation systematically\n"
+            "- search_literature searches PubMed first, then Semantic Scholar as fallback\n"
+            "- Include citation number, claim text, and verdict in each finding\n"
+            "- For self-citations or unpublished: report as info\n"
+        )
+    def _build_initial_message(self, manuscript: Manuscript, config: Config) -> str:
+        """Override to inject multi-source verification report."""
+        base_msg = super()._build_initial_message(manuscript, config)
+        if self._verification_report:
+            return base_msg + "\n\n" + self._verification_report
+        return base_msg
+    def get_tools(self) -> list[dict]:
+        return [
+            TOOL_READ_SECTION,
+            TOOL_GET_REFERENCE_LIST,
+            TOOL_SEARCH_LITERATURE,
+            TOOL_GET_ABSTRACT,
+            ADD_FINDING_TOOL,
+        ]
+    async def handle_tool_call(self, tool_name: str, tool_input: dict) -> str:
+        ms = self._manuscript
+        assert ms is not None
+        if tool_name == "read_section":
+            return read_section(ms, tool_input["section_name"])
+        if tool_name == "get_reference_list":
+            return get_reference_list(ms)
+        if tool_name == "search_literature":
+            assert self._pubmed is not None
+            assert self._s2 is not None
+            return await search_literature(
+                self._pubmed,
+                self._s2,
+                tool_input["author"],
+                tool_input["year"],
+                tool_input.get("title_keywords", ""),
+            )
+        if tool_name == "get_abstract":
+            assert self._pubmed is not None
+            assert self._s2 is not None
+            return await get_abstract(self._pubmed, self._s2, tool_input["paper_id"])
+        return f"Unknown tool: {tool_name}"
+    async def run(self, manuscript: Manuscript, config: Config):
+        self._pubmed = PubMedClient(
+            email=config.claim.pubmed_email,
+            api_key=config.claim.pubmed_api_key,
+            max_concurrent=config.claim.max_concurrent_pubmed,
+        )
+        self._s2 = SemanticScholarClient(max_concurrent=3)
+        crossref = CrossrefClient(max_concurrent=3, mailto=config.claim.pubmed_email)
+        try:
+            # Run multi-source verification as harness pre-pass. Use
+            # reconstructed entries, not raw lines — verifying table captions
+            # and wrapped fragments wastes 3 API calls each and pollutes the
+            # report with bogus NOT_FOUND rows.
+            ref_lines = reference_entries(manuscript.reference_section)
+            if ref_lines:
+                logger.info(
+                    "Running multi-source verification for %d references...", len(ref_lines)
+                )
+                verified = await verify_references(ref_lines, self._pubmed, self._s2, crossref)
+                self._verification_report = format_verification_report(verified)
+                logger.info("Verification complete: %s", self._verification_report[:200])
+            return await super().run(manuscript, config)
+        finally:
+            await self._pubmed.close()
+            await self._s2.close()
+            await crossref.close()