visual-parser 1.0.0__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {visual_parser-1.0.0 → visual_parser-1.0.1}/PKG-INFO +2 -2
  2. {visual_parser-1.0.0 → visual_parser-1.0.1}/README.md +1 -1
  3. {visual_parser-1.0.0 → visual_parser-1.0.1}/pyproject.toml +1 -1
  4. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/__init__.py +2 -2
  5. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/PKG-INFO +2 -2
  6. {visual_parser-1.0.0 → visual_parser-1.0.1}/setup.cfg +0 -0
  7. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/__main__.py +0 -0
  8. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/cli.py +0 -0
  9. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/cli_main.py +0 -0
  10. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/config.py +0 -0
  11. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/figure_describer.py +0 -0
  12. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/jsonl_writer.py +0 -0
  13. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/metadata_extractor.py +0 -0
  14. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/nougat_engine.py +0 -0
  15. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/pdf_tracker.py +0 -0
  16. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/pipeline.py +0 -0
  17. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/prompts.py +0 -0
  18. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/text_extractor.py +0 -0
  19. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser/vision_llm.py +0 -0
  20. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/SOURCES.txt +0 -0
  21. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/dependency_links.txt +0 -0
  22. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/entry_points.txt +0 -0
  23. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/requires.txt +0 -0
  24. {visual_parser-1.0.0 → visual_parser-1.0.1}/visual_parser.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: visual-parser
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: Standalone Visual-RAG PDF Parser — text extraction + Vision-LLM figure descriptions → JSONL
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/SmartLabNuclear/RADIANT_LLM
@@ -52,7 +52,7 @@ Requires-Dist: mypy; extra == "dev"
52
52
 
53
53
  By default, the pipeline writes:
54
54
  - `01_chunks_kb.jsonl`: chunked text extracted from PDFs (Nougat by default).
55
- - `02_figures_kb.jsonl`: figure/page visual descriptions (Vision LLM).
55
+ - `02_visuals_kb.jsonl`: figure/page visual descriptions (Vision LLM).
56
56
  - `03_metadata_kb.jsonl`: document metadata rows (title/author/etc.).
57
57
  - `04_processed_pdfs.txt`: a tracker so re-runs only process new PDFs (unless `--rebuild`).
58
58
 
@@ -13,7 +13,7 @@
13
13
 
14
14
  By default, the pipeline writes:
15
15
  - `01_chunks_kb.jsonl`: chunked text extracted from PDFs (Nougat by default).
16
- - `02_figures_kb.jsonl`: figure/page visual descriptions (Vision LLM).
16
+ - `02_visuals_kb.jsonl`: figure/page visual descriptions (Vision LLM).
17
17
  - `03_metadata_kb.jsonl`: document metadata rows (title/author/etc.).
18
18
  - `04_processed_pdfs.txt`: a tracker so re-runs only process new PDFs (unless `--rebuild`).
19
19
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "visual-parser"
7
- version = "1.0.0"
7
+ version = "1.0.1"
8
8
  description = "Standalone Visual-RAG PDF Parser — text extraction + Vision-LLM figure descriptions → JSONL"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -7,7 +7,7 @@ a Vision LLM (OpenAI GPT-4o or Google Gemini), and writes three JSONL knowledge
7
7
  bases ready for any downstream RAG system:
8
8
 
9
9
  01_chunks_kb.jsonl – text chunks with stable IDs
10
- 02_visuals_kb.jsonl – per-figure visual descriptions
10
+ 02_visuals_kb.jsonl – per-figure visual descriptions
11
11
  03_metadata_kb.jsonl – document-level metadata (title, authors, DOI …)
12
12
 
13
13
  No chatbot, no vector store, no retrieval – just a robust parser.
@@ -17,4 +17,4 @@ from visual_parser.config import ParserConfig
17
17
  from visual_parser.pipeline import run_pipeline
18
18
 
19
19
  __all__ = ["ParserConfig", "run_pipeline"]
20
- __version__ = "1.0.0"
20
+ __version__ = "1.0.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: visual-parser
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: Standalone Visual-RAG PDF Parser — text extraction + Vision-LLM figure descriptions → JSONL
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/SmartLabNuclear/RADIANT_LLM
@@ -52,7 +52,7 @@ Requires-Dist: mypy; extra == "dev"
52
52
 
53
53
  By default, the pipeline writes:
54
54
  - `01_chunks_kb.jsonl`: chunked text extracted from PDFs (Nougat by default).
55
- - `02_figures_kb.jsonl`: figure/page visual descriptions (Vision LLM).
55
+ - `02_visuals_kb.jsonl`: figure/page visual descriptions (Vision LLM).
56
56
  - `03_metadata_kb.jsonl`: document metadata rows (title/author/etc.).
57
57
  - `04_processed_pdfs.txt`: a tracker so re-runs only process new PDFs (unless `--rebuild`).
58
58
 
File without changes