structflo-cser 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/CLAUDE.md +12 -3
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/PKG-INFO +1 -1
- structflo_cser-0.4.0/notebooks/01-quickstart.ipynb +1170 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/pyproject.toml +2 -1
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/finetune/lps/diag_e2e_decompose.py +16 -1
- structflo_cser-0.4.0/scripts/finetune/lps/diag_label_recall.py +195 -0
- structflo_cser-0.4.0/scripts/finetune/lps/diag_lps_scores.py +204 -0
- structflo_cser-0.4.0/scripts/finetune/relmatch/eval_compare.py +191 -0
- structflo_cser-0.4.0/scripts/finetune/relmatch/eval_compare_all.py +204 -0
- structflo_cser-0.4.0/scripts/finetune/relmatch/prepare_det_data.py +156 -0
- structflo_cser-0.4.0/scripts/finetune/relmatch/sweep_margin.py +152 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/publish_weights.py +42 -12
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/inference/detector.py +19 -4
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/pipeline/pipeline.py +18 -5
- structflo_cser-0.4.0/structflo/cser/relmatch/__init__.py +22 -0
- structflo_cser-0.4.0/structflo/cser/relmatch/dataset.py +234 -0
- structflo_cser-0.4.0/structflo/cser/relmatch/features.py +62 -0
- structflo_cser-0.4.0/structflo/cser/relmatch/matcher.py +124 -0
- structflo_cser-0.4.0/structflo/cser/relmatch/model.py +179 -0
- structflo_cser-0.4.0/structflo/cser/relmatch/train.py +213 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/weights.py +11 -0
- structflo_cser-0.3.0/notebooks/01-quickstart.ipynb +0 -959
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/.github/workflows/ci.yml +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/.github/workflows/publish.yml +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/.gitignore +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/.python-version +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/Makefile +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/README.md +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/annotate/__main__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/annotate/pdf.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/annotate/server.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/annotate/storage.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/annotate/templates/index.html +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/asset_scripts/download_chembl.sh +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/config/data.yaml +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/config/pipeline.yaml +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/docs/fine-tune.md +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/docs/images/example-1.png +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/docs/images/example-2.png +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/docs/learned_matcher_plan.md +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/docs/lps.md +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/docs/publishing-weights.md +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/main.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/notebooks/02-LPS.ipynb +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/notebooks/03-PDF.ipynb +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/notebooks/notebook-data/bio-arcgive-1.png +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/notebooks/notebook-data/example-annotated.pdf +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/notebooks/notebook-data/example.pdf +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/notebooks/notebook-data/example.pptx +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/notebooks/notebook-data/screen-1.png +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/notebooks/notebook-data/syn-1.jpg +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/quick.md +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/finetune/lps/eval_compare.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/finetune/lps/eval_end2end.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/finetune/lps/eval_rejection.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/finetune/lps/mine_fp_negatives.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/finetune/lps/prepare_data.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/finetune/lps/train.sh +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/finetune/yolo/eval_compare.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/finetune/yolo/prepare_data.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/scripts/finetune/yolo/train.sh +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/_geometry.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/config.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/data/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/data/distractor_images.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/data/smiles.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/distractors/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/distractors/charts.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/distractors/shapes.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/distractors/text_elements.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/generation/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/generation/dataset.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/generation/page.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/generation/specialty.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/generation/tabular.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/inference/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/inference/nms.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/inference/pairing.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/inference/tiling.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/lps/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/lps/dataset.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/lps/evaluate.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/lps/features.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/lps/matcher.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/lps/scorer.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/lps/train.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/pipeline/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/pipeline/cli.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/pipeline/matcher.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/pipeline/models.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/pipeline/ocr.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/pipeline/smiles_extractor.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/rendering/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/rendering/chemistry.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/rendering/text.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/training/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/training/trainer.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/viz/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/viz/detections.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/structflo/cser/viz/labels.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/tests/__init__.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/tests/test_config.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/tests/test_generation.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/tests/test_geometry.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/tests/test_imports.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/tests/test_inference.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/tests/test_models.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/tests/test_viz.py +0 -0
- {structflo_cser-0.3.0 → structflo_cser-0.4.0}/uv.lock +0 -0
|
@@ -119,16 +119,25 @@ ChemPipeline.to_records(pairs)
|
|
|
119
119
|
- **Architecture**: YOLO11l (ultralytics)
|
|
120
120
|
- **Classes**: 2 — `chemical_structure` (0), `compound_label` (1)
|
|
121
121
|
- **Training image size**: 1280px
|
|
122
|
-
- **Inference**:
|
|
122
|
+
- **Inference**: full-image at imgsz=1280 (the training resolution) is the default and
|
|
123
|
+
strictly outperforms tiling on large landscape pages — verified on real_test: label
|
|
124
|
+
recall 53%→80%, struct 93%→99%, 5× fewer false positives, end-to-end pairing F1 0.41→0.82.
|
|
125
|
+
Sliding-window tiling (1536px tiles, 20% overlap, per-class NMS) remains available via
|
|
126
|
+
`tile=True` for very dense pages, but cuts labels at tile boundaries.
|
|
123
127
|
- **Training config**: AdamW, cosine LR, grayscale images, no colour augmentation
|
|
124
128
|
- **Runs directory**: `runs/labels_detect/`
|
|
125
129
|
- **YOLO data config**: `config/data.yaml`
|
|
126
130
|
|
|
127
131
|
## Matching strategies
|
|
128
132
|
|
|
129
|
-
1. **HungarianMatcher** — centroid Euclidean distance + `scipy.optimize.linear_sum_assignment
|
|
133
|
+
1. **HungarianMatcher** — centroid Euclidean distance + `scipy.optimize.linear_sum_assignment`.
|
|
134
|
+
Parameter-free; strong baseline on clean detections.
|
|
130
135
|
2. **LearnedMatcher** (LPS) — CNN scorer produces association probability per (struct, label) pair,
|
|
131
|
-
then Hungarian on `1 - score`.
|
|
136
|
+
then Hungarian on `1 - score`.
|
|
137
|
+
3. **RelationalMatcher** (`structflo/cser/relmatch/`) — geometry-only transformer over all page
|
|
138
|
+
detections + Sinkhorn optimal transport with learnable dustbins (SuperGlue-style). **Default in
|
|
139
|
+
ChemPipeline.** Best learned matcher in the benchmark (matches distance on assignment, best at
|
|
140
|
+
rejecting unlabelled structures). Weights: `cser-relmatcher` (HF Hub).
|
|
132
141
|
|
|
133
142
|
## Weights system
|
|
134
143
|
|