PyPI - sub-checker - Versions diffs - 0.1.0__tar.gz - Mend

sub-checker 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

sub_checker-0.1.0/LICENSE +21 -0
sub_checker-0.1.0/PKG-INFO +193 -0
sub_checker-0.1.0/README.md +152 -0
sub_checker-0.1.0/pyproject.toml +107 -0
sub_checker-0.1.0/setup.cfg +4 -0
sub_checker-0.1.0/src/sub_checker/__init__.py +3 -0
sub_checker-0.1.0/src/sub_checker/agents/__init__.py +0 -0
sub_checker-0.1.0/src/sub_checker/agents/base.py +448 -0
sub_checker-0.1.0/src/sub_checker/agents/citation_claim.py +142 -0
sub_checker-0.1.0/src/sub_checker/agents/citation_exist.py +100 -0
sub_checker-0.1.0/src/sub_checker/agents/citation_format.py +94 -0
sub_checker-0.1.0/src/sub_checker/agents/figure_table.py +62 -0
sub_checker-0.1.0/src/sub_checker/agents/journal_guidelines.py +110 -0
sub_checker-0.1.0/src/sub_checker/agents/logic.py +45 -0
sub_checker-0.1.0/src/sub_checker/agents/typo_grammar.py +58 -0
sub_checker-0.1.0/src/sub_checker/api.py +239 -0
sub_checker-0.1.0/src/sub_checker/cli.py +195 -0
sub_checker-0.1.0/src/sub_checker/config.py +113 -0
sub_checker-0.1.0/src/sub_checker/env.py +23 -0
sub_checker-0.1.0/src/sub_checker/eval_runner.py +319 -0
sub_checker-0.1.0/src/sub_checker/harness/__init__.py +0 -0
sub_checker-0.1.0/src/sub_checker/harness/dedup.py +86 -0
sub_checker-0.1.0/src/sub_checker/harness/deterministic.py +284 -0
sub_checker-0.1.0/src/sub_checker/harness/reviewer.py +409 -0
sub_checker-0.1.0/src/sub_checker/i18n.py +98 -0
sub_checker-0.1.0/src/sub_checker/logging_config.py +175 -0
sub_checker-0.1.0/src/sub_checker/models.py +98 -0
sub_checker-0.1.0/src/sub_checker/orchestrator.py +278 -0
sub_checker-0.1.0/src/sub_checker/parsers/__init__.py +0 -0
sub_checker-0.1.0/src/sub_checker/parsers/docx_parser.py +185 -0
sub_checker-0.1.0/src/sub_checker/pipeline.py +73 -0
sub_checker-0.1.0/src/sub_checker/reporters/__init__.py +0 -0
sub_checker-0.1.0/src/sub_checker/reporters/html_reporter.py +531 -0
sub_checker-0.1.0/src/sub_checker/reporters/json_reporter.py +55 -0
sub_checker-0.1.0/src/sub_checker/reporters/markdown_reporter.py +60 -0
sub_checker-0.1.0/src/sub_checker/reporters/terminal.py +71 -0
sub_checker-0.1.0/src/sub_checker/services/__init__.py +0 -0
sub_checker-0.1.0/src/sub_checker/services/citation_verifier.py +331 -0
sub_checker-0.1.0/src/sub_checker/services/crossref.py +106 -0
sub_checker-0.1.0/src/sub_checker/services/http_client.py +159 -0
sub_checker-0.1.0/src/sub_checker/services/pubmed.py +106 -0
sub_checker-0.1.0/src/sub_checker/services/semantic_scholar.py +87 -0
sub_checker-0.1.0/src/sub_checker/services/web.py +124 -0
sub_checker-0.1.0/src/sub_checker/tools/__init__.py +0 -0
sub_checker-0.1.0/src/sub_checker/tools/filesystem_tools.py +63 -0
sub_checker-0.1.0/src/sub_checker/tools/manuscript_tools.py +239 -0
sub_checker-0.1.0/src/sub_checker/tools/pubmed_tools.py +132 -0
sub_checker-0.1.0/src/sub_checker/tools/web_tools.py +59 -0
sub_checker-0.1.0/src/sub_checker.egg-info/PKG-INFO +193 -0
sub_checker-0.1.0/src/sub_checker.egg-info/SOURCES.txt +58 -0
sub_checker-0.1.0/src/sub_checker.egg-info/dependency_links.txt +1 -0
sub_checker-0.1.0/src/sub_checker.egg-info/entry_points.txt +3 -0
sub_checker-0.1.0/src/sub_checker.egg-info/requires.txt +20 -0
sub_checker-0.1.0/src/sub_checker.egg-info/top_level.txt +1 -0
sub_checker-0.1.0/tests/test_citation_parsing.py +47 -0
sub_checker-0.1.0/tests/test_docx_parser.py +148 -0
sub_checker-0.1.0/tests/test_eval_runner.py +100 -0
sub_checker-0.1.0/tests/test_harness.py +268 -0
sub_checker-0.1.0/tests/test_services.py +175 -0
sub_checker-0.1.0/tests/test_tools.py +90 -0

sub_checker-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 odafeng
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

sub_checker-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,193 @@
+Metadata-Version: 2.4
+Name: sub-checker
+Version: 0.1.0
+Summary: Pre-submission manuscript checker powered by Claude agents
+Author: odafeng
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/odafeng/sub-checker
+Project-URL: Repository, https://github.com/odafeng/sub-checker
+Project-URL: Issues, https://github.com/odafeng/sub-checker/issues
+Keywords: academic,manuscript,submission,checker,agent,claude,proofreading,citation,journal
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering
+Classifier: Topic :: Text Processing :: Linguistic
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: click>=8.0
+Requires-Dist: python-docx>=1.0
+Requires-Dist: anthropic>=0.106
+Requires-Dist: httpx>=0.27
+Requires-Dist: pydantic>=2.0
+Requires-Dist: pyyaml>=6.0
+Requires-Dist: rich>=13.0
+Provides-Extra: web
+Requires-Dist: fastapi>=0.115; extra == "web"
+Requires-Dist: uvicorn[standard]>=0.32; extra == "web"
+Requires-Dist: python-multipart>=0.0.12; extra == "web"
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
+Requires-Dist: pytest-httpx>=0.30; extra == "dev"
+Requires-Dist: ruff>=0.8; extra == "dev"
+Requires-Dist: pyright>=1.1; extra == "dev"
+Requires-Dist: pre-commit>=4.0; extra == "dev"
+Dynamic: license-file
+# sub-checker
+[繁體中文](README.zh-TW.md) | English
+Pre-submission manuscript checker powered by Claude agents with a Plan-Execute-Verify harness. Each check is performed by a specialized AI agent, then validated by deterministic checks and a reviewer agent to eliminate false positives.
+## What it checks
+| Agent | What it does |
+|-------|-------------|
+| **typo_grammar** | Spelling, grammar, awkward phrasing (skips reference list) |
+| **figure_table** | Figure/table references exist, numbering is sequential, files present |
+| **citation_exist** | In-text citations match the reference list (deterministic pre-scan + agent) |
+| **citation_format** | Reference list follows target journal's citation style (APA, Vancouver, AMA, etc.) |
+| **journal_guidelines** | Word count, required sections, abstract format, required statements (COI, ethics, data availability) |
+| **logic** | Contradictions, unsupported claims, methods-results mismatches |
+| **citation_claim** | Multi-source verification (PubMed + Semantic Scholar + Crossref), then verifies claims against abstracts |
+## Install
+```bash
+pip install sub-checker
+```
+## Setup
+You need an Anthropic API key:
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...
+```
+Or create a `.env` file in your working directory:
+```
+ANTHROPIC_API_KEY=sk-ant-...
+```
+## Usage
+### CLI
+```bash
+# Full check with target journal
+sub-check paper.docx -j "The Lancet"
+# Chinese report output
+sub-check paper.docx -j "Nature Medicine" --lang zh-TW
+# Only run specific checkers (cheaper & faster)
+sub-check paper.docx --only figure,citation
+# Skip expensive checkers
+sub-check paper.docx --skip claim,logic
+# Output as styled HTML report (includes COT viewer + confidence scores)
+sub-check paper.docx -o html --output-file report.html
+# Output as JSON (for programmatic use)
+sub-check paper.docx -o json --output-file report.json
+# Dry run (just parse, no agents)
+sub-check paper.docx --dry-run
+```
+### Web GUI
+```bash
+# Start backend
+uvicorn sub_checker.api:app --reload
+# Start frontend (in another terminal)
+cd frontend && npm run dev
+```
+Open `http://localhost:5173` — upload a `.docx`, pick a journal, run, and view the report with confidence badges and filtered false positives.
+### CLI options
+```
+sub-check [OPTIONS] MANUSCRIPT_PATH
+Arguments:
+  MANUSCRIPT_PATH    Path to .docx file or directory containing one
+Options:
+  -j, --journal      Target journal name (e.g. "The Lancet")
+  -o, --output       terminal | json | markdown | html (default: terminal)
+  --output-file      Write report to file
+  --lang             Report language: en (default) or zh-TW
+  --only             Comma-separated: typo,logic,figure,citation,format,guidelines,claim
+  --skip             Comma-separated checkers to skip
+  -v, --verbose      Show agent tool calls in real-time
+  --dry-run          Only parse .docx, don't run agents
+  --init             Generate default .sub-checker.yaml
+```
+## Pipeline (5 phases)
+```
+Phase 1-3  │  7 checker agents (parallel within each phase)
+Phase 4    │  Deterministic post-validation (date math, citation cross-check)
+Phase 5    │  Reviewer agent validates all findings → confidence scores
+```
+- **Pre-execution**: deterministic citation pre-scan + multi-source reference verification
+- **Post-validation**: false positives filtered, remaining findings get confidence scores (0-100%)
+- See [harness-architecture.md](docs/harness-architecture.md) for full technical details
+## HTML report features
+- Dark-themed styled report with severity badges
+- **Confidence scores** — each finding shows reviewer-assigned confidence (%)
+- **False positive filtering** — deterministic + reviewer agent removes incorrect findings
+- **Chain of Thought viewer** — expand to see every API call, tool use, and reasoning step
+- **Model display** — shows which Claude model generated the report
+- i18n support (English / Traditional Chinese)
+## Cost estimate
+Uses Claude Opus 4.8 by default. Approximate cost per manuscript (~4000 words):
+| Scope | Agents | Time | Cost |
+|-------|--------|------|------|
+| Quick check | `--only figure,citation` | ~4 min | ~$3 |
+| Standard | `--skip claim` | ~8 min | ~$7 |
+| Full check | all 7 agents + harness | ~12 min | ~$12–16 |
+You can change the model in `.sub-checker.yaml` (e.g. use `claude-sonnet-4-6` for cheaper runs).
+## Logging
+All logs are stored in `~/.sub-checker/`:
+- `logs/sub-checker.log` — application log (auto-rotated, 10MB x 5)
+- `logs/sub-checker.error.log` — errors only
+- `cot/` — agent chain-of-thought JSON logs (every tool call, every response)
+Set `cot_dir: "disabled"` in `.sub-checker.yaml` to turn off COT file logging (entries still appear in HTML reports).
+## Architecture
+- **5-phase pipeline**: Plan-Execute-Verify harness ([ADR-0010](docs/adr/0010-plan-execute-verify-harness.md))
+- 7 agents + reviewer agent, each with system prompt + curated tools + agentic loop ([ADR-0002](docs/adr/0002-agent-per-checker-architecture.md))
+- Parser provides raw data; agents judge document structure ([ADR-0009](docs/adr/0009-agent-over-deterministic-parsing.md))
+- Multi-source citation verification: PubMed + Semantic Scholar + Crossref ([ADR-0005](docs/adr/0005-semantic-scholar-fallback.md))
+- FastAPI + React + TypeScript GUI ([ADR-0006](docs/adr/0006-fastapi-react-gui.md))
+- [Benchmark comparison](docs/benchmark-comparison.md) | [Harness architecture](docs/harness-architecture.md)
+## License
+MIT

sub_checker-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,152 @@
+# sub-checker
+[繁體中文](README.zh-TW.md) | English
+Pre-submission manuscript checker powered by Claude agents with a Plan-Execute-Verify harness. Each check is performed by a specialized AI agent, then validated by deterministic checks and a reviewer agent to eliminate false positives.
+## What it checks
+| Agent | What it does |
+|-------|-------------|
+| **typo_grammar** | Spelling, grammar, awkward phrasing (skips reference list) |
+| **figure_table** | Figure/table references exist, numbering is sequential, files present |
+| **citation_exist** | In-text citations match the reference list (deterministic pre-scan + agent) |
+| **citation_format** | Reference list follows target journal's citation style (APA, Vancouver, AMA, etc.) |
+| **journal_guidelines** | Word count, required sections, abstract format, required statements (COI, ethics, data availability) |
+| **logic** | Contradictions, unsupported claims, methods-results mismatches |
+| **citation_claim** | Multi-source verification (PubMed + Semantic Scholar + Crossref), then verifies claims against abstracts |
+## Install
+```bash
+pip install sub-checker
+```
+## Setup
+You need an Anthropic API key:
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...
+```
+Or create a `.env` file in your working directory:
+```
+ANTHROPIC_API_KEY=sk-ant-...
+```
+## Usage
+### CLI
+```bash
+# Full check with target journal
+sub-check paper.docx -j "The Lancet"
+# Chinese report output
+sub-check paper.docx -j "Nature Medicine" --lang zh-TW
+# Only run specific checkers (cheaper & faster)
+sub-check paper.docx --only figure,citation
+# Skip expensive checkers
+sub-check paper.docx --skip claim,logic
+# Output as styled HTML report (includes COT viewer + confidence scores)
+sub-check paper.docx -o html --output-file report.html
+# Output as JSON (for programmatic use)
+sub-check paper.docx -o json --output-file report.json
+# Dry run (just parse, no agents)
+sub-check paper.docx --dry-run
+```
+### Web GUI
+```bash
+# Start backend
+uvicorn sub_checker.api:app --reload
+# Start frontend (in another terminal)
+cd frontend && npm run dev
+```
+Open `http://localhost:5173` — upload a `.docx`, pick a journal, run, and view the report with confidence badges and filtered false positives.
+### CLI options
+```
+sub-check [OPTIONS] MANUSCRIPT_PATH
+Arguments:
+  MANUSCRIPT_PATH    Path to .docx file or directory containing one
+Options:
+  -j, --journal      Target journal name (e.g. "The Lancet")
+  -o, --output       terminal | json | markdown | html (default: terminal)
+  --output-file      Write report to file
+  --lang             Report language: en (default) or zh-TW
+  --only             Comma-separated: typo,logic,figure,citation,format,guidelines,claim
+  --skip             Comma-separated checkers to skip
+  -v, --verbose      Show agent tool calls in real-time
+  --dry-run          Only parse .docx, don't run agents
+  --init             Generate default .sub-checker.yaml
+```
+## Pipeline (5 phases)
+```
+Phase 1-3  │  7 checker agents (parallel within each phase)
+Phase 4    │  Deterministic post-validation (date math, citation cross-check)
+Phase 5    │  Reviewer agent validates all findings → confidence scores
+```
+- **Pre-execution**: deterministic citation pre-scan + multi-source reference verification
+- **Post-validation**: false positives filtered, remaining findings get confidence scores (0-100%)
+- See [harness-architecture.md](docs/harness-architecture.md) for full technical details
+## HTML report features
+- Dark-themed styled report with severity badges
+- **Confidence scores** — each finding shows reviewer-assigned confidence (%)
+- **False positive filtering** — deterministic + reviewer agent removes incorrect findings
+- **Chain of Thought viewer** — expand to see every API call, tool use, and reasoning step
+- **Model display** — shows which Claude model generated the report
+- i18n support (English / Traditional Chinese)
+## Cost estimate
+Uses Claude Opus 4.8 by default. Approximate cost per manuscript (~4000 words):
+| Scope | Agents | Time | Cost |
+|-------|--------|------|------|
+| Quick check | `--only figure,citation` | ~4 min | ~$3 |
+| Standard | `--skip claim` | ~8 min | ~$7 |
+| Full check | all 7 agents + harness | ~12 min | ~$12–16 |
+You can change the model in `.sub-checker.yaml` (e.g. use `claude-sonnet-4-6` for cheaper runs).
+## Logging
+All logs are stored in `~/.sub-checker/`:
+- `logs/sub-checker.log` — application log (auto-rotated, 10MB x 5)
+- `logs/sub-checker.error.log` — errors only
+- `cot/` — agent chain-of-thought JSON logs (every tool call, every response)
+Set `cot_dir: "disabled"` in `.sub-checker.yaml` to turn off COT file logging (entries still appear in HTML reports).
+## Architecture
+- **5-phase pipeline**: Plan-Execute-Verify harness ([ADR-0010](docs/adr/0010-plan-execute-verify-harness.md))
+- 7 agents + reviewer agent, each with system prompt + curated tools + agentic loop ([ADR-0002](docs/adr/0002-agent-per-checker-architecture.md))
+- Parser provides raw data; agents judge document structure ([ADR-0009](docs/adr/0009-agent-over-deterministic-parsing.md))
+- Multi-source citation verification: PubMed + Semantic Scholar + Crossref ([ADR-0005](docs/adr/0005-semantic-scholar-fallback.md))
+- FastAPI + React + TypeScript GUI ([ADR-0006](docs/adr/0006-fastapi-react-gui.md))
+- [Benchmark comparison](docs/benchmark-comparison.md) | [Harness architecture](docs/harness-architecture.md)
+## License
+MIT

sub_checker-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,107 @@
+[build-system]
+requires = ["setuptools>=68.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "sub-checker"
+version = "0.1.0"
+description = "Pre-submission manuscript checker powered by Claude agents"
+readme = "README.md"
+license = "MIT"
+requires-python = ">=3.11"
+authors = [
+    { name = "odafeng" },
+]
+keywords = [
+    "academic",
+    "manuscript",
+    "submission",
+    "checker",
+    "agent",
+    "claude",
+    "proofreading",
+    "citation",
+    "journal",
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Science/Research",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Scientific/Engineering",
+    "Topic :: Text Processing :: Linguistic",
+]
+dependencies = [
+    "click>=8.0",
+    "python-docx>=1.0",
+    "anthropic>=0.106",
+    "httpx>=0.27",
+    "pydantic>=2.0",
+    "pyyaml>=6.0",
+    "rich>=13.0",
+]
+[project.optional-dependencies]
+web = [
+    "fastapi>=0.115",
+    "uvicorn[standard]>=0.32",
+    "python-multipart>=0.0.12",
+]
+dev = [
+    "pytest>=8.0",
+    "pytest-asyncio>=0.23",
+    "pytest-httpx>=0.30",
+    "ruff>=0.8",
+    "pyright>=1.1",
+    "pre-commit>=4.0",
+]
+[project.urls]
+Homepage = "https://github.com/odafeng/sub-checker"
+Repository = "https://github.com/odafeng/sub-checker"
+Issues = "https://github.com/odafeng/sub-checker/issues"
+[project.scripts]
+sub-check = "sub_checker.cli:main"
+sub-check-eval = "sub_checker.eval_runner:main"
+# ── Ruff ──────────────────────────────────────────────
+[tool.ruff]
+target-version = "py311"
+line-length = 100
+src = ["src", "tests"]
+[tool.ruff.lint]
+select = [
+    "E",    # pycodestyle errors
+    "W",    # pycodestyle warnings
+    "F",    # pyflakes
+    "I",    # isort
+    "N",    # pep8-naming
+    "UP",   # pyupgrade
+    "B",    # flake8-bugbear
+    "SIM",  # flake8-simplify
+    "RUF",  # ruff-specific
+]
+ignore = [
+    "E501",   # line too long (handled by formatter)
+]
+[tool.ruff.lint.isort]
+known-first-party = ["sub_checker"]
+# ── Pyright ───────────────────────────────────────────
+[tool.pyright]
+pythonVersion = "3.11"
+pythonPlatform = "All"
+typeCheckingMode = "basic"
+venvPath = "."
+venv = ".venv"
+reportMissingImports = false
+# ── Pytest ────────────────────────────────────────────
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]

sub_checker-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

sub_checker-0.1.0/src/sub_checker/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Sub-Checker: Pre-submission manuscript checker powered by Claude agents."""
+__version__ = "0.1.0"

sub_checker-0.1.0/src/sub_checker/agents/__init__.py ADDED Viewed

File without changes