visual-parser 1.0.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {visual_parser-1.0.0 → visual_parser-1.0.2}/PKG-INFO +4 -2
- {visual_parser-1.0.0 → visual_parser-1.0.2}/README.md +1 -1
- {visual_parser-1.0.0 → visual_parser-1.0.2}/pyproject.toml +3 -1
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/__init__.py +2 -2
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/cli.py +28 -28
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/cli_main.py +1 -1
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser.egg-info/PKG-INFO +4 -2
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser.egg-info/requires.txt +2 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/setup.cfg +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/__main__.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/config.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/figure_describer.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/jsonl_writer.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/metadata_extractor.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/nougat_engine.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/pdf_tracker.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/pipeline.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/prompts.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/text_extractor.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser/vision_llm.py +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser.egg-info/SOURCES.txt +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser.egg-info/dependency_links.txt +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser.egg-info/entry_points.txt +0 -0
- {visual_parser-1.0.0 → visual_parser-1.0.2}/visual_parser.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: visual-parser
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Standalone Visual-RAG PDF Parser — text extraction + Vision-LLM figure descriptions → JSONL
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/SmartLabNuclear/RADIANT_LLM
|
|
@@ -30,6 +30,8 @@ Requires-Dist: openai==1.78.1
|
|
|
30
30
|
Requires-Dist: google-generativeai==0.8.5
|
|
31
31
|
Requires-Dist: python-dotenv==1.1.0
|
|
32
32
|
Requires-Dist: tqdm==4.67.1
|
|
33
|
+
Requires-Dist: nltk>=3.8
|
|
34
|
+
Requires-Dist: python-Levenshtein>=0.20
|
|
33
35
|
Provides-Extra: ocr
|
|
34
36
|
Requires-Dist: pytesseract==0.3.13; extra == "ocr"
|
|
35
37
|
Provides-Extra: dev
|
|
@@ -52,7 +54,7 @@ Requires-Dist: mypy; extra == "dev"
|
|
|
52
54
|
|
|
53
55
|
By default, the pipeline writes:
|
|
54
56
|
- `01_chunks_kb.jsonl`: chunked text extracted from PDFs (Nougat by default).
|
|
55
|
-
- `
|
|
57
|
+
- `02_visuals_kb.jsonl`: figure/page visual descriptions (Vision LLM).
|
|
56
58
|
- `03_metadata_kb.jsonl`: document metadata rows (title/author/etc.).
|
|
57
59
|
- `04_processed_pdfs.txt`: a tracker so re-runs only process new PDFs (unless `--rebuild`).
|
|
58
60
|
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
By default, the pipeline writes:
|
|
15
15
|
- `01_chunks_kb.jsonl`: chunked text extracted from PDFs (Nougat by default).
|
|
16
|
-
- `
|
|
16
|
+
- `02_visuals_kb.jsonl`: figure/page visual descriptions (Vision LLM).
|
|
17
17
|
- `03_metadata_kb.jsonl`: document metadata rows (title/author/etc.).
|
|
18
18
|
- `04_processed_pdfs.txt`: a tracker so re-runs only process new PDFs (unless `--rebuild`).
|
|
19
19
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "visual-parser"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.2"
|
|
8
8
|
description = "Standalone Visual-RAG PDF Parser — text extraction + Vision-LLM figure descriptions → JSONL"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -40,6 +40,8 @@ dependencies = [
|
|
|
40
40
|
"google-generativeai==0.8.5",
|
|
41
41
|
"python-dotenv==1.1.0",
|
|
42
42
|
"tqdm==4.67.1",
|
|
43
|
+
"nltk>=3.8",
|
|
44
|
+
"python-Levenshtein>=0.20",
|
|
43
45
|
]
|
|
44
46
|
|
|
45
47
|
[project.optional-dependencies]
|
|
@@ -7,7 +7,7 @@ a Vision LLM (OpenAI GPT-4o or Google Gemini), and writes three JSONL knowledge
|
|
|
7
7
|
bases ready for any downstream RAG system:
|
|
8
8
|
|
|
9
9
|
01_chunks_kb.jsonl – text chunks with stable IDs
|
|
10
|
-
02_visuals_kb.jsonl – per-figure visual descriptions
|
|
10
|
+
02_visuals_kb.jsonl – per-figure visual descriptions
|
|
11
11
|
03_metadata_kb.jsonl – document-level metadata (title, authors, DOI …)
|
|
12
12
|
|
|
13
13
|
No chatbot, no vector store, no retrieval – just a robust parser.
|
|
@@ -17,4 +17,4 @@ from visual_parser.config import ParserConfig
|
|
|
17
17
|
from visual_parser.pipeline import run_pipeline
|
|
18
18
|
|
|
19
19
|
__all__ = ["ParserConfig", "run_pipeline"]
|
|
20
|
-
__version__ = "1.0.
|
|
20
|
+
__version__ = "1.0.2"
|
|
@@ -17,8 +17,8 @@ import sys
|
|
|
17
17
|
USAGE_EXAMPLES = """
|
|
18
18
|
Examples
|
|
19
19
|
--------
|
|
20
|
-
# Nougat (default) + GPT-5.
|
|
21
|
-
python visual-parser.py --input-dir ./my_pdfs
|
|
20
|
+
# Nougat (default) + GPT-5.4 vision
|
|
21
|
+
python visual-parser.py --input-dir ./my_pdfs
|
|
22
22
|
|
|
23
23
|
# Fast lightweight extraction + Gemini
|
|
24
24
|
python visual-parser.py --input-dir ./my_pdfs \\
|
|
@@ -47,7 +47,7 @@ def _build_arg_parser() -> argparse.ArgumentParser:
|
|
|
47
47
|
"Visual-RAG PDF Parser — detects new PDFs, extracts text and "
|
|
48
48
|
"figure descriptions, and writes three JSONL knowledge bases:\n"
|
|
49
49
|
" 01_chunks_kb.jsonl text chunks\n"
|
|
50
|
-
" 02_visuals_kb.jsonl visual descriptions\n"
|
|
50
|
+
" 02_visuals_kb.jsonl visual descriptions\n"
|
|
51
51
|
" 03_metadata_kb.jsonl document metadata"
|
|
52
52
|
),
|
|
53
53
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
@@ -108,20 +108,20 @@ def _build_arg_parser() -> argparse.ArgumentParser:
|
|
|
108
108
|
choices=["gpt", "gemini"],
|
|
109
109
|
default="gpt",
|
|
110
110
|
help=(
|
|
111
|
-
"gpt — OpenAI GPT-5.
|
|
112
|
-
"gemini — Google Gemini (set GEMINI_API_KEY in .env)."
|
|
113
|
-
),
|
|
114
|
-
)
|
|
111
|
+
"gpt — OpenAI GPT-5.4 (set OPENAI_API_KEY in .env).\n"
|
|
112
|
+
"gemini — Google Gemini (set GEMINI_API_KEY in .env)."
|
|
113
|
+
),
|
|
114
|
+
)
|
|
115
115
|
vision_group.add_argument(
|
|
116
116
|
"--vision-model",
|
|
117
117
|
default=None,
|
|
118
118
|
metavar="MODEL_NAME",
|
|
119
|
-
help=(
|
|
120
|
-
"Vision model name. Omit to use the latest for each provider:\n"
|
|
121
|
-
" gpt → gpt-5.
|
|
122
|
-
" gemini → gemini-3-pro-preview (also: gemini-2.5-flash, gemini-1.5-pro)"
|
|
123
|
-
),
|
|
124
|
-
)
|
|
119
|
+
help=(
|
|
120
|
+
"Vision model name. Omit to use the latest for each provider:\n"
|
|
121
|
+
" gpt → gpt-5.4 (also: gpt-5.5, gpt-5.3-chat-latest, gpt-5.2, gpt-5.1, gpt-5, gpt-4o, gpt-4.1)\n"
|
|
122
|
+
" gemini → gemini-3-pro-preview (also: gemini-2.5-flash, gemini-1.5-pro)"
|
|
123
|
+
),
|
|
124
|
+
)
|
|
125
125
|
vision_group.add_argument(
|
|
126
126
|
"--vision-detail",
|
|
127
127
|
choices=["low", "high", "auto"],
|
|
@@ -134,17 +134,17 @@ def _build_arg_parser() -> argparse.ArgumentParser:
|
|
|
134
134
|
)
|
|
135
135
|
vision_group.add_argument(
|
|
136
136
|
"--reasoning-effort",
|
|
137
|
-
choices=["minimal", "none", "low", "medium", "high", "xhigh"],
|
|
137
|
+
choices=["minimal", "none", "low", "medium", "high", "xhigh"],
|
|
138
138
|
default="medium",
|
|
139
|
-
help=(
|
|
140
|
-
"Reasoning effort for GPT-5.x models (ignored for Gemini and older GPT).\n"
|
|
141
|
-
" minimal/none — minimum reasoning, depending on model.\n"
|
|
142
|
-
" low — light reasoning.\n"
|
|
143
|
-
" medium — balanced (default).\n"
|
|
144
|
-
" high — deeper reasoning, slower.\n"
|
|
145
|
-
" xhigh — maximum depth (gpt-5.2, gpt-5.4, and gpt-5.5)."
|
|
146
|
-
),
|
|
147
|
-
)
|
|
139
|
+
help=(
|
|
140
|
+
"Reasoning effort for GPT-5.x models (ignored for Gemini and older GPT).\n"
|
|
141
|
+
" minimal/none — minimum reasoning, depending on model.\n"
|
|
142
|
+
" low — light reasoning.\n"
|
|
143
|
+
" medium — balanced (default).\n"
|
|
144
|
+
" high — deeper reasoning, slower.\n"
|
|
145
|
+
" xhigh — maximum depth (gpt-5.2, gpt-5.4, and gpt-5.5)."
|
|
146
|
+
),
|
|
147
|
+
)
|
|
148
148
|
vision_group.add_argument(
|
|
149
149
|
"--metadata-pages",
|
|
150
150
|
type=int,
|
|
@@ -194,10 +194,10 @@ def main(argv=None) -> int:
|
|
|
194
194
|
args = parser.parse_args(argv)
|
|
195
195
|
|
|
196
196
|
# Default vision model per provider when not explicitly set
|
|
197
|
-
if args.vision_model is None:
|
|
198
|
-
args.vision_model = (
|
|
199
|
-
"gpt-5.
|
|
200
|
-
)
|
|
197
|
+
if args.vision_model is None:
|
|
198
|
+
args.vision_model = (
|
|
199
|
+
"gpt-5.4" if args.vision_provider == "gpt" else "gemini-3-pro-preview"
|
|
200
|
+
)
|
|
201
201
|
|
|
202
202
|
from visual_parser.config import ParserConfig
|
|
203
203
|
|
|
@@ -209,7 +209,7 @@ def main(argv=None) -> int:
|
|
|
209
209
|
chunk_size = args.chunk_size,
|
|
210
210
|
chunk_overlap = args.chunk_overlap,
|
|
211
211
|
vision_provider = args.vision_provider,
|
|
212
|
-
gpt_vision_model = args.vision_model if args.vision_provider == "gpt" else "gpt-5.
|
|
212
|
+
gpt_vision_model = args.vision_model if args.vision_provider == "gpt" else "gpt-5.4",
|
|
213
213
|
gemini_vision_model = args.vision_model if args.vision_provider == "gemini" else "gemini-3-pro-preview",
|
|
214
214
|
gpt_reasoning_effort = args.reasoning_effort,
|
|
215
215
|
vision_detail = args.vision_detail,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: visual-parser
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Standalone Visual-RAG PDF Parser — text extraction + Vision-LLM figure descriptions → JSONL
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/SmartLabNuclear/RADIANT_LLM
|
|
@@ -30,6 +30,8 @@ Requires-Dist: openai==1.78.1
|
|
|
30
30
|
Requires-Dist: google-generativeai==0.8.5
|
|
31
31
|
Requires-Dist: python-dotenv==1.1.0
|
|
32
32
|
Requires-Dist: tqdm==4.67.1
|
|
33
|
+
Requires-Dist: nltk>=3.8
|
|
34
|
+
Requires-Dist: python-Levenshtein>=0.20
|
|
33
35
|
Provides-Extra: ocr
|
|
34
36
|
Requires-Dist: pytesseract==0.3.13; extra == "ocr"
|
|
35
37
|
Provides-Extra: dev
|
|
@@ -52,7 +54,7 @@ Requires-Dist: mypy; extra == "dev"
|
|
|
52
54
|
|
|
53
55
|
By default, the pipeline writes:
|
|
54
56
|
- `01_chunks_kb.jsonl`: chunked text extracted from PDFs (Nougat by default).
|
|
55
|
-
- `
|
|
57
|
+
- `02_visuals_kb.jsonl`: figure/page visual descriptions (Vision LLM).
|
|
56
58
|
- `03_metadata_kb.jsonl`: document metadata rows (title/author/etc.).
|
|
57
59
|
- `04_processed_pdfs.txt`: a tracker so re-runs only process new PDFs (unless `--rebuild`).
|
|
58
60
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|