sub-checker 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. sub_checker/__init__.py +3 -0
  2. sub_checker/agents/__init__.py +0 -0
  3. sub_checker/agents/base.py +448 -0
  4. sub_checker/agents/citation_claim.py +142 -0
  5. sub_checker/agents/citation_exist.py +100 -0
  6. sub_checker/agents/citation_format.py +94 -0
  7. sub_checker/agents/figure_table.py +62 -0
  8. sub_checker/agents/journal_guidelines.py +110 -0
  9. sub_checker/agents/logic.py +45 -0
  10. sub_checker/agents/typo_grammar.py +58 -0
  11. sub_checker/api.py +239 -0
  12. sub_checker/cli.py +195 -0
  13. sub_checker/config.py +113 -0
  14. sub_checker/env.py +23 -0
  15. sub_checker/eval_runner.py +319 -0
  16. sub_checker/harness/__init__.py +0 -0
  17. sub_checker/harness/dedup.py +86 -0
  18. sub_checker/harness/deterministic.py +284 -0
  19. sub_checker/harness/reviewer.py +409 -0
  20. sub_checker/i18n.py +98 -0
  21. sub_checker/logging_config.py +175 -0
  22. sub_checker/models.py +98 -0
  23. sub_checker/orchestrator.py +278 -0
  24. sub_checker/parsers/__init__.py +0 -0
  25. sub_checker/parsers/docx_parser.py +185 -0
  26. sub_checker/pipeline.py +73 -0
  27. sub_checker/reporters/__init__.py +0 -0
  28. sub_checker/reporters/html_reporter.py +531 -0
  29. sub_checker/reporters/json_reporter.py +55 -0
  30. sub_checker/reporters/markdown_reporter.py +60 -0
  31. sub_checker/reporters/terminal.py +71 -0
  32. sub_checker/services/__init__.py +0 -0
  33. sub_checker/services/citation_verifier.py +331 -0
  34. sub_checker/services/crossref.py +106 -0
  35. sub_checker/services/http_client.py +159 -0
  36. sub_checker/services/pubmed.py +106 -0
  37. sub_checker/services/semantic_scholar.py +87 -0
  38. sub_checker/services/web.py +124 -0
  39. sub_checker/tools/__init__.py +0 -0
  40. sub_checker/tools/filesystem_tools.py +63 -0
  41. sub_checker/tools/manuscript_tools.py +239 -0
  42. sub_checker/tools/pubmed_tools.py +132 -0
  43. sub_checker/tools/web_tools.py +59 -0
  44. sub_checker-0.1.0.dist-info/METADATA +193 -0
  45. sub_checker-0.1.0.dist-info/RECORD +49 -0
  46. sub_checker-0.1.0.dist-info/WHEEL +5 -0
  47. sub_checker-0.1.0.dist-info/entry_points.txt +3 -0
  48. sub_checker-0.1.0.dist-info/licenses/LICENSE +21 -0
  49. sub_checker-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,3 @@
1
+ """Sub-Checker: Pre-submission manuscript checker powered by Claude agents."""
2
+
3
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,448 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import logging
5
+ import time
6
+ import uuid
7
+ from abc import ABC, abstractmethod
8
+ from datetime import UTC, datetime
9
+ from functools import cached_property
10
+ from pathlib import Path
11
+ from typing import Any, cast
12
+
13
+ import anthropic
14
+ from anthropic.types import MessageParam, ToolParam
15
+
16
+ from sub_checker.config import Config
17
+ from sub_checker.logging_config import _DEFAULT_COT_DIR, AgentCOTLogger
18
+ from sub_checker.models import (
19
+ CheckerResult,
20
+ Finding,
21
+ Manuscript,
22
+ Severity,
23
+ TokenUsage,
24
+ )
25
+
26
+ logger = logging.getLogger("sub_checker.agents")
27
+
28
+ # Safety cap on the agentic loop: prevents runaway token spend if the model
29
+ # never reaches end_turn.
30
+ MAX_ITERATIONS = 30
31
+
32
+
33
+ def supports_adaptive_thinking(model: str) -> bool:
34
+ """Adaptive thinking is available on Opus 4.6+ / Sonnet 4.6 / Fable 5."""
35
+ return any(
36
+ marker in model
37
+ for marker in ("opus-4-6", "opus-4-7", "opus-4-8", "sonnet-4-6", "fable")
38
+ )
39
+
40
+
41
+ def set_message_cache_breakpoint(messages: list[MessageParam]) -> None:
42
+ """Mark the last content block of the last message with cache_control.
43
+
44
+ Moves the single message-level cache breakpoint forward each iteration so
45
+ the entire conversation prefix is served from the prompt cache. Combined
46
+ with the system + tools breakpoints this stays within the 4-breakpoint
47
+ API limit.
48
+ """
49
+ for msg in messages:
50
+ content = msg.get("content")
51
+ if isinstance(content, list):
52
+ for block in content:
53
+ if isinstance(block, dict):
54
+ block.pop("cache_control", None)
55
+
56
+ last_content = messages[-1].get("content")
57
+ if isinstance(last_content, list) and last_content and isinstance(last_content[-1], dict):
58
+ last_block = cast("dict[str, Any]", last_content[-1])
59
+ last_block["cache_control"] = {"type": "ephemeral"}
60
+
61
+
62
+ class BaseCheckerAgent(ABC):
63
+ """Base class for all checker agents.
64
+
65
+ Each agent runs an agentic loop:
66
+ 1. Send system prompt + tools + initial message to Claude
67
+ 2. If Claude returns tool_use → execute tool → feed result back → repeat
68
+ 3. If Claude returns text (done) → collect all add_finding calls → return CheckerResult
69
+ """
70
+
71
+ name: str = "base"
72
+ # Reasoning depth / token budget. Mechanical checkers (pattern matching,
73
+ # cross-referencing) override this down to save output tokens; judgment-
74
+ # heavy checkers keep "high". GA on Sonnet 4.6 and Opus 4.6+.
75
+ effort: str = "high"
76
+
77
+ def __init__(self, model: str = "claude-opus-4-8"):
78
+ self.model = model
79
+ self._findings: list[Finding] = []
80
+ self._token_usage = TokenUsage()
81
+ self._manuscript: Manuscript | None = None
82
+
83
+ @cached_property
84
+ def system_prompt(self) -> str:
85
+ prompt_path = Path(__file__).parent / "prompts" / f"{self.name}.txt"
86
+ if prompt_path.exists():
87
+ return prompt_path.read_text()
88
+ return self._default_system_prompt()
89
+
90
+ @abstractmethod
91
+ def _default_system_prompt(self) -> str:
92
+ """Fallback system prompt if no .txt file exists."""
93
+ ...
94
+
95
+ @abstractmethod
96
+ def get_tools(self) -> list[dict]:
97
+ """Return tool definitions for this agent."""
98
+ ...
99
+
100
+ @abstractmethod
101
+ async def handle_tool_call(self, tool_name: str, tool_input: dict) -> str:
102
+ """Execute a tool and return the result as a string."""
103
+ ...
104
+
105
+ def _build_initial_message(self, manuscript: Manuscript, config: Config) -> str:
106
+ """Build the initial user message with task context."""
107
+ today = datetime.now(UTC).strftime("%Y-%m-%d")
108
+ parts = [
109
+ f"Today's date: {today}",
110
+ f'Please check the following manuscript: "{manuscript.title}"',
111
+ ]
112
+ if config.journal:
113
+ parts.append(f"Target journal: {config.journal}")
114
+ else:
115
+ parts.append(
116
+ "Target journal: NOT SPECIFIED. "
117
+ "Do NOT assume any journal-specific requirements (formatting, "
118
+ "citation style, word limits). Only check internal consistency "
119
+ "within the manuscript itself."
120
+ )
121
+ parts.append(
122
+ f"The manuscript has {len(manuscript.sections)} sections "
123
+ f"and {len(manuscript.paragraphs)} paragraphs."
124
+ )
125
+ parts.append("Use the provided tools to read the manuscript and report any findings.")
126
+ parts.append(
127
+ "When calling add_finding, ALWAYS set claim_type (and claimed_date / "
128
+ "ref_number where applicable) — the validation harness uses these "
129
+ "fields to fact-check findings deterministically."
130
+ )
131
+ if config.output_lang == "zh-TW":
132
+ parts.append(
133
+ "\nIMPORTANT: Write ALL your findings (message, suggestion) in Traditional Chinese (繁體中文). "
134
+ "The manuscript itself is in English, but your output in add_finding must be in 繁體中文. "
135
+ "Example: message='引用 [15] 在文中被引用但參考文獻列表中缺失', "
136
+ "suggestion='請在參考文獻列表中新增 [15] 或修正引用編號'"
137
+ )
138
+ return "\n".join(parts)
139
+
140
+ def _handle_add_finding(self, tool_input: dict) -> str:
141
+ """Process an add_finding tool call."""
142
+ severity_str = tool_input.get("severity", "warning").upper()
143
+ try:
144
+ severity = Severity[severity_str]
145
+ except KeyError:
146
+ severity = Severity.WARNING
147
+
148
+ ref_number = tool_input.get("ref_number")
149
+ if not isinstance(ref_number, int):
150
+ ref_number = None
151
+
152
+ finding = Finding(
153
+ checker=self.name,
154
+ severity=severity,
155
+ message=tool_input.get("message", ""),
156
+ location=tool_input.get("location"),
157
+ suggestion=tool_input.get("suggestion"),
158
+ context=tool_input.get("context"),
159
+ claim_type=tool_input.get("claim_type"),
160
+ claimed_date=tool_input.get("claimed_date"),
161
+ ref_number=ref_number,
162
+ )
163
+ self._findings.append(finding)
164
+ return f"Finding recorded: [{severity.value}] {finding.message}"
165
+
166
+ def _note_incomplete(self, message: str) -> None:
167
+ """Record that this check is incomplete.
168
+
169
+ validation_status="confirmed" keeps the note out of the harness/
170
+ reviewer (which could otherwise filter it as "not a manuscript
171
+ issue"), so the user always sees that coverage was partial.
172
+ """
173
+ self._findings.append(
174
+ Finding(
175
+ checker=self.name,
176
+ severity=Severity.INFO,
177
+ message=message,
178
+ claim_type="other",
179
+ validation_status="confirmed",
180
+ validation_note="[harness] incomplete-run notice, not reviewed",
181
+ )
182
+ )
183
+
184
+ async def run(self, manuscript: Manuscript, config: Config) -> CheckerResult:
185
+ """Execute the agent loop with full logging."""
186
+ self._manuscript = manuscript
187
+ self._findings = []
188
+ self._token_usage = TokenUsage()
189
+ start = time.monotonic()
190
+
191
+ run_id = uuid.uuid4().hex[:8]
192
+ if config.cot_dir == "disabled":
193
+ cot_dir = None # explicitly disable COT file output
194
+ elif config.cot_dir:
195
+ cot_dir = Path(config.cot_dir) # custom directory
196
+ else:
197
+ cot_dir = _DEFAULT_COT_DIR # None in config → use default
198
+ cot = AgentCOTLogger(
199
+ agent_name=self.name,
200
+ run_id=run_id,
201
+ cot_dir=cot_dir,
202
+ )
203
+ logger.info("Starting agent '%s' (run_id=%s)", self.name, run_id)
204
+
205
+ # Deep-copy: tool definitions are shared module constants and we add
206
+ # a cache_control marker to the last one (caches system + tools prefix).
207
+ tools = cast(list[ToolParam], copy.deepcopy(self.get_tools()))
208
+ if tools:
209
+ tools[-1]["cache_control"] = {"type": "ephemeral"} # type: ignore[typeddict-unknown-key]
210
+ system_blocks = [
211
+ {
212
+ "type": "text",
213
+ "text": self.system_prompt,
214
+ "cache_control": {"type": "ephemeral"},
215
+ }
216
+ ]
217
+ messages = cast(
218
+ list[MessageParam],
219
+ [
220
+ {
221
+ "role": "user",
222
+ "content": [
223
+ {
224
+ "type": "text",
225
+ "text": self._build_initial_message(manuscript, config),
226
+ }
227
+ ],
228
+ }
229
+ ],
230
+ )
231
+
232
+ iteration = 0
233
+ try:
234
+ async with anthropic.AsyncAnthropic() as client:
235
+ while True:
236
+ iteration += 1
237
+ if iteration > MAX_ITERATIONS:
238
+ logger.warning(
239
+ "[%s] Hit max iterations (%d), stopping agent loop",
240
+ self.name,
241
+ MAX_ITERATIONS,
242
+ )
243
+ self._note_incomplete(
244
+ f"Check stopped after reaching the {MAX_ITERATIONS}-iteration "
245
+ "safety cap — some items may not have been checked."
246
+ )
247
+ break
248
+ logger.debug("[%s] Iteration %d: sending API request", self.name, iteration)
249
+ cot.log_request(messages, tools)
250
+ set_message_cache_breakpoint(messages)
251
+
252
+ extra: dict[str, Any] = {}
253
+ if supports_adaptive_thinking(self.model):
254
+ # Adaptive thinking + per-checker effort: low effort
255
+ # keeps thinking minimal (and avoids reasoning leaking
256
+ # into the visible response on Opus 4.8).
257
+ extra["thinking"] = {"type": "adaptive"}
258
+ extra["output_config"] = {"effort": self.effort}
259
+ response = await client.messages.create(
260
+ model=self.model,
261
+ max_tokens=16000,
262
+ system=system_blocks, # type: ignore[arg-type]
263
+ tools=tools,
264
+ messages=messages,
265
+ **extra,
266
+ )
267
+
268
+ self._token_usage.input_tokens += response.usage.input_tokens
269
+ self._token_usage.output_tokens += response.usage.output_tokens
270
+ self._token_usage.cache_creation_input_tokens += (
271
+ response.usage.cache_creation_input_tokens or 0
272
+ )
273
+ self._token_usage.cache_read_input_tokens += (
274
+ response.usage.cache_read_input_tokens or 0
275
+ )
276
+ cot.log_response(str(response.stop_reason), response.content)
277
+
278
+ if response.stop_reason == "end_turn":
279
+ logger.debug("[%s] Agent finished (end_turn)", self.name)
280
+ break
281
+
282
+ truncated = response.stop_reason == "max_tokens"
283
+ content_blocks = list(response.content)
284
+ if truncated:
285
+ logger.warning(
286
+ "[%s] Response truncated at max_tokens on iteration %d",
287
+ self.name,
288
+ iteration,
289
+ )
290
+ # A trailing tool_use may carry incomplete input —
291
+ # never execute a half-formed call (e.g. a cut-off
292
+ # add_finding would record a garbage finding).
293
+ if content_blocks:
294
+ last = content_blocks[-1]
295
+ if last.type == "tool_use":
296
+ content_blocks.pop()
297
+ logger.warning(
298
+ "[%s] Dropped truncated tool_use '%s'", self.name, last.name
299
+ )
300
+
301
+ # Process tool calls
302
+ tool_results = []
303
+ for block in content_blocks:
304
+ if block.type == "tool_use":
305
+ logger.debug(
306
+ "[%s] Tool call: %s(%s)", self.name, block.name, block.input
307
+ )
308
+
309
+ if block.name == "add_finding":
310
+ result = self._handle_add_finding(block.input)
311
+ inp = block.input
312
+ cot.log_finding(
313
+ str(inp.get("severity", "warning")),
314
+ str(inp.get("message", "")),
315
+ )
316
+ else:
317
+ try:
318
+ result = await self.handle_tool_call(block.name, block.input)
319
+ except Exception as e:
320
+ result = f"Tool error: {e}"
321
+ logger.error(
322
+ "[%s] Tool '%s' failed: %s",
323
+ self.name,
324
+ block.name,
325
+ e,
326
+ exc_info=True,
327
+ )
328
+ cot.log_error(f"Tool '{block.name}' failed: {e}", e)
329
+
330
+ cot.log_tool_result(block.name, block.id, result)
331
+ tool_results.append(
332
+ {"type": "tool_result", "tool_use_id": block.id, "content": result}
333
+ )
334
+
335
+ if not tool_results:
336
+ if truncated:
337
+ self._note_incomplete(
338
+ "Check may be incomplete: the model response was "
339
+ "truncated at the token limit before any tool call."
340
+ )
341
+ logger.debug("[%s] No tool results, ending loop", self.name)
342
+ break
343
+
344
+ messages.append(
345
+ cast(MessageParam, {"role": "assistant", "content": content_blocks})
346
+ )
347
+ messages.append(cast(MessageParam, {"role": "user", "content": tool_results}))
348
+
349
+ except anthropic.APIError as e:
350
+ # Keep findings already collected (paid for) instead of discarding
351
+ # the whole run; flag the result as incomplete.
352
+ logger.error("[%s] API error, returning partial result: %s", self.name, e)
353
+ cot.log_error(f"API error (partial result): {e}", e)
354
+ self._note_incomplete(
355
+ f"Check incomplete: the API failed after {iteration} iterations ({e}). "
356
+ f"{len(self._findings)} finding(s) collected before the failure are kept."
357
+ )
358
+ except Exception as e:
359
+ logger.error("[%s] Agent failed: %s", self.name, e, exc_info=True)
360
+ cot.log_error(f"Agent failed: {e}", e)
361
+ raise
362
+ finally:
363
+ elapsed = time.monotonic() - start
364
+ cot_path = cot.save()
365
+ logger.info(
366
+ "[%s] Completed in %.1fs (%d findings, %d iterations). COT: %s",
367
+ self.name,
368
+ elapsed,
369
+ len(self._findings),
370
+ iteration,
371
+ cot_path,
372
+ )
373
+
374
+ return CheckerResult(
375
+ checker_name=self.name,
376
+ findings=list(self._findings),
377
+ elapsed_seconds=elapsed,
378
+ token_usage=self._token_usage,
379
+ cot_entries=cot.entries,
380
+ model=self.model,
381
+ )
382
+
383
+
384
+ # Shared tool definition for add_finding (all agents use this)
385
+ ADD_FINDING_TOOL = {
386
+ "name": "add_finding",
387
+ "description": "Report a finding/issue found in the manuscript.",
388
+ "input_schema": {
389
+ "type": "object",
390
+ "properties": {
391
+ "severity": {
392
+ "type": "string",
393
+ "enum": ["error", "warning", "info"],
394
+ "description": "Severity: error (must fix), warning (should review), info (suggestion)",
395
+ },
396
+ "message": {
397
+ "type": "string",
398
+ "description": "Description of the issue found",
399
+ },
400
+ "location": {
401
+ "type": "string",
402
+ "description": "Where in the manuscript (e.g. 'Section: Methods, Paragraph 5')",
403
+ },
404
+ "suggestion": {
405
+ "type": "string",
406
+ "description": "How to fix the issue",
407
+ },
408
+ "context": {
409
+ "type": "string",
410
+ "description": "Surrounding text snippet for context",
411
+ },
412
+ "claim_type": {
413
+ "type": "string",
414
+ "enum": [
415
+ "future_date",
416
+ "uncited_reference",
417
+ "missing_reference",
418
+ "inconsistency",
419
+ "other",
420
+ ],
421
+ "description": (
422
+ "Machine-checkable claim category. Use 'future_date' when the finding "
423
+ "claims a date is in the future, 'uncited_reference' when a reference "
424
+ "list entry is claimed to never be cited in the text, "
425
+ "'missing_reference' when a citation number is claimed to be absent "
426
+ "from the reference list, 'inconsistency' for format/style "
427
+ "inconsistency claims, 'other' otherwise. ALWAYS set this field — it "
428
+ "lets the validation harness fact-check the finding deterministically."
429
+ ),
430
+ },
431
+ "claimed_date": {
432
+ "type": "string",
433
+ "description": (
434
+ "For claim_type='future_date': the date the claim is about, "
435
+ "as 'YYYY' or 'YYYY-MM' (e.g. '2025-11')"
436
+ ),
437
+ },
438
+ "ref_number": {
439
+ "type": "integer",
440
+ "description": (
441
+ "For citation-related claims: the reference/citation number "
442
+ "the finding concerns (e.g. 23 for reference [23])"
443
+ ),
444
+ },
445
+ },
446
+ "required": ["severity", "message"],
447
+ },
448
+ }
@@ -0,0 +1,142 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from sub_checker.agents.base import ADD_FINDING_TOOL, BaseCheckerAgent
6
+ from sub_checker.config import Config
7
+ from sub_checker.models import Manuscript
8
+ from sub_checker.services.citation_verifier import (
9
+ format_verification_report,
10
+ verify_references,
11
+ )
12
+ from sub_checker.services.crossref import CrossrefClient
13
+ from sub_checker.services.pubmed import PubMedClient
14
+ from sub_checker.services.semantic_scholar import SemanticScholarClient
15
+ from sub_checker.tools.manuscript_tools import (
16
+ TOOL_GET_REFERENCE_LIST,
17
+ TOOL_READ_SECTION,
18
+ get_reference_list,
19
+ read_section,
20
+ reference_entries,
21
+ )
22
+ from sub_checker.tools.pubmed_tools import (
23
+ TOOL_GET_ABSTRACT,
24
+ TOOL_SEARCH_LITERATURE,
25
+ get_abstract,
26
+ search_literature,
27
+ )
28
+
29
+ logger = logging.getLogger("sub_checker.agents.citation_claim")
30
+
31
+
32
+ class CitationClaimAgent(BaseCheckerAgent):
33
+ name = "citation_claim"
34
+
35
+ def __init__(self, model: str = "claude-opus-4-8"):
36
+ super().__init__(model=model)
37
+ self._pubmed: PubMedClient | None = None
38
+ self._s2: SemanticScholarClient | None = None
39
+ self._verification_report: str = ""
40
+
41
+ def _default_system_prompt(self) -> str:
42
+ return (
43
+ "You are a citation verification expert. Your job is to verify that EVERY "
44
+ "citation in the manuscript is supported by the actual referenced paper.\n\n"
45
+ "## Pre-Verification Report\n\n"
46
+ "A MULTI-SOURCE VERIFICATION REPORT is provided in your initial message.\n"
47
+ "This report was generated by querying PubMed, Semantic Scholar, AND Crossref\n"
48
+ "in parallel, then cross-validating results. Each reference has a confidence\n"
49
+ "score and verification status.\n\n"
50
+ "TRUST this report for reference existence. Focus your effort on:\n"
51
+ "- Verifying that claims in the text are actually supported by the cited papers\n"
52
+ "- For 'verified' references: use get_abstract to compare claim vs abstract\n"
53
+ "- For 'not_found' references: note this in your finding (may be very recent)\n\n"
54
+ "## Workflow\n\n"
55
+ "1. Review the pre-verification report for reference existence/validity\n"
56
+ "2. Read the reference list and each manuscript section\n"
57
+ "3. For EACH citation:\n"
58
+ " a. Identify the CLAIM being made\n"
59
+ " b. Check the pre-verification status\n"
60
+ " c. For verified/likely_valid refs: use get_abstract to check claim support\n"
61
+ " d. Report your verdict via add_finding\n\n"
62
+ "## Verdict Categories\n\n"
63
+ "- **SUPPORTS** (severity=info): Abstract clearly supports the claim\n"
64
+ "- **PARTIALLY_SUPPORTS** (severity=info): Related but doesn't fully address\n"
65
+ "- **CONTRADICTS** (severity=error): Abstract contradicts the claim\n"
66
+ "- **INSUFFICIENT** (severity=warning): Not enough info to verify\n"
67
+ "- **NOT_FOUND** (severity=warning): Paper not found (check pre-verification)\n"
68
+ "- **NO_ABSTRACT** (severity=warning): Paper found but no abstract available\n\n"
69
+ "## Important Rules\n\n"
70
+ "- Verify EVERY citation systematically\n"
71
+ "- search_literature searches PubMed first, then Semantic Scholar as fallback\n"
72
+ "- Include citation number, claim text, and verdict in each finding\n"
73
+ "- For self-citations or unpublished: report as info\n"
74
+ )
75
+
76
+ def _build_initial_message(self, manuscript: Manuscript, config: Config) -> str:
77
+ """Override to inject multi-source verification report."""
78
+ base_msg = super()._build_initial_message(manuscript, config)
79
+ if self._verification_report:
80
+ return base_msg + "\n\n" + self._verification_report
81
+ return base_msg
82
+
83
+ def get_tools(self) -> list[dict]:
84
+ return [
85
+ TOOL_READ_SECTION,
86
+ TOOL_GET_REFERENCE_LIST,
87
+ TOOL_SEARCH_LITERATURE,
88
+ TOOL_GET_ABSTRACT,
89
+ ADD_FINDING_TOOL,
90
+ ]
91
+
92
+ async def handle_tool_call(self, tool_name: str, tool_input: dict) -> str:
93
+ ms = self._manuscript
94
+ assert ms is not None
95
+ if tool_name == "read_section":
96
+ return read_section(ms, tool_input["section_name"])
97
+ if tool_name == "get_reference_list":
98
+ return get_reference_list(ms)
99
+ if tool_name == "search_literature":
100
+ assert self._pubmed is not None
101
+ assert self._s2 is not None
102
+ return await search_literature(
103
+ self._pubmed,
104
+ self._s2,
105
+ tool_input["author"],
106
+ tool_input["year"],
107
+ tool_input.get("title_keywords", ""),
108
+ )
109
+ if tool_name == "get_abstract":
110
+ assert self._pubmed is not None
111
+ assert self._s2 is not None
112
+ return await get_abstract(self._pubmed, self._s2, tool_input["paper_id"])
113
+ return f"Unknown tool: {tool_name}"
114
+
115
+ async def run(self, manuscript: Manuscript, config: Config):
116
+ self._pubmed = PubMedClient(
117
+ email=config.claim.pubmed_email,
118
+ api_key=config.claim.pubmed_api_key,
119
+ max_concurrent=config.claim.max_concurrent_pubmed,
120
+ )
121
+ self._s2 = SemanticScholarClient(max_concurrent=3)
122
+ crossref = CrossrefClient(max_concurrent=3, mailto=config.claim.pubmed_email)
123
+
124
+ try:
125
+ # Run multi-source verification as harness pre-pass. Use
126
+ # reconstructed entries, not raw lines — verifying table captions
127
+ # and wrapped fragments wastes 3 API calls each and pollutes the
128
+ # report with bogus NOT_FOUND rows.
129
+ ref_lines = reference_entries(manuscript.reference_section)
130
+ if ref_lines:
131
+ logger.info(
132
+ "Running multi-source verification for %d references...", len(ref_lines)
133
+ )
134
+ verified = await verify_references(ref_lines, self._pubmed, self._s2, crossref)
135
+ self._verification_report = format_verification_report(verified)
136
+ logger.info("Verification complete: %s", self._verification_report[:200])
137
+
138
+ return await super().run(manuscript, config)
139
+ finally:
140
+ await self._pubmed.close()
141
+ await self._s2.close()
142
+ await crossref.close()