visual-parser 1.0.0__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {visual_parser-1.0.0 → visual_parser-1.0.1}/PKG-INFO +2 -2
- {visual_parser-1.0.0 → visual_parser-1.0.1}/README.md +1 -1
- {visual_parser-1.0.0 → visual_parser-1.0.1}/pyproject.toml +1 -1
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/__init__.py +2 -2
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/PKG-INFO +2 -2
- {visual_parser-1.0.0 → visual_parser-1.0.1}/setup.cfg +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/__main__.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/cli.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/cli_main.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/config.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/figure_describer.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/jsonl_writer.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/metadata_extractor.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/nougat_engine.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/pdf_tracker.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/pipeline.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/prompts.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/text_extractor.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/vision_llm.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/SOURCES.txt +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/dependency_links.txt +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/entry_points.txt +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/requires.txt +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: visual-parser
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Standalone Visual-RAG PDF Parser — text extraction + Vision-LLM figure descriptions → JSONL
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/SmartLabNuclear/RADIANT_LLM
|
|
@@ -52,7 +52,7 @@ Requires-Dist: mypy; extra == "dev"
|
|
|
52
52
|
|
|
53
53
|
By default, the pipeline writes:
|
|
54
54
|
- `01_chunks_kb.jsonl`: chunked text extracted from PDFs (Nougat by default).
|
|
55
|
-
- `
|
|
55
|
+
- `02_visuals_kb.jsonl`: figure/page visual descriptions (Vision LLM).
|
|
56
56
|
- `03_metadata_kb.jsonl`: document metadata rows (title/author/etc.).
|
|
57
57
|
- `04_processed_pdfs.txt`: a tracker so re-runs only process new PDFs (unless `--rebuild`).
|
|
58
58
|
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
By default, the pipeline writes:
|
|
15
15
|
- `01_chunks_kb.jsonl`: chunked text extracted from PDFs (Nougat by default).
|
|
16
|
-
- `
|
|
16
|
+
- `02_visuals_kb.jsonl`: figure/page visual descriptions (Vision LLM).
|
|
17
17
|
- `03_metadata_kb.jsonl`: document metadata rows (title/author/etc.).
|
|
18
18
|
- `04_processed_pdfs.txt`: a tracker so re-runs only process new PDFs (unless `--rebuild`).
|
|
19
19
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "visual-parser"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.1"
|
|
8
8
|
description = "Standalone Visual-RAG PDF Parser — text extraction + Vision-LLM figure descriptions → JSONL"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -7,7 +7,7 @@ a Vision LLM (OpenAI GPT-4o or Google Gemini), and writes three JSONL knowledge
|
|
|
7
7
|
bases ready for any downstream RAG system:
|
|
8
8
|
|
|
9
9
|
01_chunks_kb.jsonl – text chunks with stable IDs
|
|
10
|
-
02_visuals_kb.jsonl – per-figure visual descriptions
|
|
10
|
+
02_visuals_kb.jsonl – per-figure visual descriptions
|
|
11
11
|
03_metadata_kb.jsonl – document-level metadata (title, authors, DOI …)
|
|
12
12
|
|
|
13
13
|
No chatbot, no vector store, no retrieval – just a robust parser.
|
|
@@ -17,4 +17,4 @@ from visual_parser.config import ParserConfig
|
|
|
17
17
|
from visual_parser.pipeline import run_pipeline
|
|
18
18
|
|
|
19
19
|
__all__ = ["ParserConfig", "run_pipeline"]
|
|
20
|
-
__version__ = "1.0.
|
|
20
|
+
__version__ = "1.0.1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: visual-parser
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Standalone Visual-RAG PDF Parser — text extraction + Vision-LLM figure descriptions → JSONL
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/SmartLabNuclear/RADIANT_LLM
|
|
@@ -52,7 +52,7 @@ Requires-Dist: mypy; extra == "dev"
|
|
|
52
52
|
|
|
53
53
|
By default, the pipeline writes:
|
|
54
54
|
- `01_chunks_kb.jsonl`: chunked text extracted from PDFs (Nougat by default).
|
|
55
|
-
- `
|
|
55
|
+
- `02_visuals_kb.jsonl`: figure/page visual descriptions (Vision LLM).
|
|
56
56
|
- `03_metadata_kb.jsonl`: document metadata rows (title/author/etc.).
|
|
57
57
|
- `04_processed_pdfs.txt`: a tracker so re-runs only process new PDFs (unless `--rebuild`).
|
|
58
58
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|