versiref-bible 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- versiref_bible-0.1.0/LICENSE +21 -0
- versiref_bible-0.1.0/PKG-INFO +121 -0
- versiref_bible-0.1.0/README.md +93 -0
- versiref_bible-0.1.0/pyproject.toml +63 -0
- versiref_bible-0.1.0/src/versiref/bible/__init__.py +16 -0
- versiref_bible-0.1.0/src/versiref/bible/builder.py +115 -0
- versiref_bible-0.1.0/src/versiref/bible/cli.py +243 -0
- versiref_bible-0.1.0/src/versiref/bible/database.py +230 -0
- versiref_bible-0.1.0/src/versiref/bible/models.py +48 -0
- versiref_bible-0.1.0/src/versiref/bible/py.typed +0 -0
- versiref_bible-0.1.0/src/versiref/bible/reader.py +123 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Michael F. Polis
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: versiref-bible
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Access Bibles using versiref
|
|
5
|
+
Author: Fr. John Lawrence M. Polis
|
|
6
|
+
Author-email: Fr. John Lawrence M. Polis <emptier-sank-dose@duck.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Religion
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
|
+
Classifier: Topic :: Religion
|
|
21
|
+
Classifier: Topic :: Text Processing
|
|
22
|
+
Requires-Dist: versiref>=0.5.1
|
|
23
|
+
Requires-Dist: click>=8.1.0
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Project-URL: Homepage, https://github.com/fiapps/versiref-bible
|
|
26
|
+
Project-URL: Issues, https://github.com/fiapps/versiref-bible/issues
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# VersiRef Bible
|
|
30
|
+
|
|
31
|
+
[VersiRef](https://github.com/fiapps/versiref) is a Python package for sophisticated parsing,
|
|
32
|
+
manipulation, and printing of references to the Bible.
|
|
33
|
+
|
|
34
|
+
`versiref-bible` provides access to Bibles in an SQLite-based format: ranges of verses can be
|
|
35
|
+
retrieved by reference, and verse text can be searched with SQLite FTS5. VersiRef parses the
|
|
36
|
+
references and handles versification.
|
|
37
|
+
|
|
38
|
+
The command-line interface is designed for use by an LLM: output is compact plain text, one
|
|
39
|
+
verse per line as `reference⇥text` (TAB-separated).
|
|
40
|
+
|
|
41
|
+
## Documentation
|
|
42
|
+
|
|
43
|
+
- [Building Databases](docs/building.md) — the `build` command (producers).
|
|
44
|
+
- [Querying Databases](docs/querying.md) — the `show`, `search`, and `info` commands (consumers).
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
```sh
|
|
49
|
+
uv sync
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
This installs the `versiref-bible` command into the project environment (run it with
|
|
53
|
+
`uv run versiref-bible …`).
|
|
54
|
+
|
|
55
|
+
## Commands
|
|
56
|
+
|
|
57
|
+
### `build` — create a database from a text file
|
|
58
|
+
|
|
59
|
+
Reads a CCAT-format `.cat` file where each line is `Abbrev C:V text` (the abbreviation is a
|
|
60
|
+
BibleWorks-style book name) and writes an SQLite database. Each verse is stored as one row,
|
|
61
|
+
keyed by an integer verse key computed under the chosen versification, plus an FTS5 index over
|
|
62
|
+
the verse text. CCAT footnotes/formatting are kept as plain text for now.
|
|
63
|
+
|
|
64
|
+
```sh
|
|
65
|
+
uv run versiref-bible build BIBLE.cat --versification eng --title "My Bible"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Options:
|
|
69
|
+
|
|
70
|
+
- `-v, --versification` (required) — named versification of the Bible (`eng`, `lxx`, `cei`, …).
|
|
71
|
+
- `-o, --output` — database path (default: input name with a `.db` suffix).
|
|
72
|
+
- `--title` — human-readable title stored in the database.
|
|
73
|
+
- `--book-style` — reference style whose names map the file's abbreviations
|
|
74
|
+
(default `en-bibleworks`).
|
|
75
|
+
- `--encoding` — input text encoding (default `utf-8`; the CEI sample needs `cp1252`).
|
|
76
|
+
|
|
77
|
+
Lines whose book abbreviation is unrecognized (e.g. the Sirach prologue `Sip`), or whose book
|
|
78
|
+
is absent from the chosen versification, are skipped with a warning on stderr.
|
|
79
|
+
|
|
80
|
+
### `show` — print the verses of a reference
|
|
81
|
+
|
|
82
|
+
```sh
|
|
83
|
+
uv run versiref-bible show BIBLE.db "John 3:16-18"
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
- `--style` — reference style for parsing the input and labelling output (default `en-sbl`).
|
|
87
|
+
- `--from-versification` — interpret the reference in this versification and map it to the
|
|
88
|
+
database's versification.
|
|
89
|
+
|
|
90
|
+
The reference is parsed in the style you choose, so use that style's conventions
|
|
91
|
+
(e.g. `--style it-cei` expects `Gen 1,1-3`, not `Gen 1:1-3`).
|
|
92
|
+
|
|
93
|
+
### `search` — full-text search verse text
|
|
94
|
+
|
|
95
|
+
```sh
|
|
96
|
+
uv run versiref-bible search BIBLE.db "living water" --limit 10
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
`QUERY` uses SQLite FTS5 syntax (e.g. `light`, `"living water"`, `love AND world`). Results
|
|
100
|
+
are in canonical verse order by default.
|
|
101
|
+
|
|
102
|
+
- `-n, --limit` — maximum verses to return (default 20).
|
|
103
|
+
- `--in` — restrict the search to a reference (e.g. `--in "Gen 1"`, `--in "John"`).
|
|
104
|
+
- `--order` — `canonical` (default, verse order) or `relevance` (bm25 ranking).
|
|
105
|
+
- `--style` — reference style for labelling output and parsing `--in` (default `en-sbl`).
|
|
106
|
+
|
|
107
|
+
### `info` — show database metadata
|
|
108
|
+
|
|
109
|
+
```sh
|
|
110
|
+
uv run versiref-bible info BIBLE.db
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Prints the stored metadata (title, versification, source, build time, …) and the verse count.
|
|
114
|
+
|
|
115
|
+
## Development
|
|
116
|
+
|
|
117
|
+
```sh
|
|
118
|
+
uv run pytest # tests
|
|
119
|
+
uv run ruff check # lint
|
|
120
|
+
uv run mypy src # type-check
|
|
121
|
+
```
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# VersiRef Bible
|
|
2
|
+
|
|
3
|
+
[VersiRef](https://github.com/fiapps/versiref) is a Python package for sophisticated parsing,
|
|
4
|
+
manipulation, and printing of references to the Bible.
|
|
5
|
+
|
|
6
|
+
`versiref-bible` provides access to Bibles in an SQLite-based format: ranges of verses can be
|
|
7
|
+
retrieved by reference, and verse text can be searched with SQLite FTS5. VersiRef parses the
|
|
8
|
+
references and handles versification.
|
|
9
|
+
|
|
10
|
+
The command-line interface is designed for use by an LLM: output is compact plain text, one
|
|
11
|
+
verse per line as `reference⇥text` (TAB-separated).
|
|
12
|
+
|
|
13
|
+
## Documentation
|
|
14
|
+
|
|
15
|
+
- [Building Databases](docs/building.md) — the `build` command (producers).
|
|
16
|
+
- [Querying Databases](docs/querying.md) — the `show`, `search`, and `info` commands (consumers).
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```sh
|
|
21
|
+
uv sync
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
This installs the `versiref-bible` command into the project environment (run it with
|
|
25
|
+
`uv run versiref-bible …`).
|
|
26
|
+
|
|
27
|
+
## Commands
|
|
28
|
+
|
|
29
|
+
### `build` — create a database from a text file
|
|
30
|
+
|
|
31
|
+
Reads a CCAT-format `.cat` file where each line is `Abbrev C:V text` (the abbreviation is a
|
|
32
|
+
BibleWorks-style book name) and writes an SQLite database. Each verse is stored as one row,
|
|
33
|
+
keyed by an integer verse key computed under the chosen versification, plus an FTS5 index over
|
|
34
|
+
the verse text. CCAT footnotes/formatting are kept as plain text for now.
|
|
35
|
+
|
|
36
|
+
```sh
|
|
37
|
+
uv run versiref-bible build BIBLE.cat --versification eng --title "My Bible"
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Options:
|
|
41
|
+
|
|
42
|
+
- `-v, --versification` (required) — named versification of the Bible (`eng`, `lxx`, `cei`, …).
|
|
43
|
+
- `-o, --output` — database path (default: input name with a `.db` suffix).
|
|
44
|
+
- `--title` — human-readable title stored in the database.
|
|
45
|
+
- `--book-style` — reference style whose names map the file's abbreviations
|
|
46
|
+
(default `en-bibleworks`).
|
|
47
|
+
- `--encoding` — input text encoding (default `utf-8`; the CEI sample needs `cp1252`).
|
|
48
|
+
|
|
49
|
+
Lines whose book abbreviation is unrecognized (e.g. the Sirach prologue `Sip`), or whose book
|
|
50
|
+
is absent from the chosen versification, are skipped with a warning on stderr.
|
|
51
|
+
|
|
52
|
+
### `show` — print the verses of a reference
|
|
53
|
+
|
|
54
|
+
```sh
|
|
55
|
+
uv run versiref-bible show BIBLE.db "John 3:16-18"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
- `--style` — reference style for parsing the input and labelling output (default `en-sbl`).
|
|
59
|
+
- `--from-versification` — interpret the reference in this versification and map it to the
|
|
60
|
+
database's versification.
|
|
61
|
+
|
|
62
|
+
The reference is parsed in the style you choose, so use that style's conventions
|
|
63
|
+
(e.g. `--style it-cei` expects `Gen 1,1-3`, not `Gen 1:1-3`).
|
|
64
|
+
|
|
65
|
+
### `search` — full-text search verse text
|
|
66
|
+
|
|
67
|
+
```sh
|
|
68
|
+
uv run versiref-bible search BIBLE.db "living water" --limit 10
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
`QUERY` uses SQLite FTS5 syntax (e.g. `light`, `"living water"`, `love AND world`). Results
|
|
72
|
+
are in canonical verse order by default.
|
|
73
|
+
|
|
74
|
+
- `-n, --limit` — maximum verses to return (default 20).
|
|
75
|
+
- `--in` — restrict the search to a reference (e.g. `--in "Gen 1"`, `--in "John"`).
|
|
76
|
+
- `--order` — `canonical` (default, verse order) or `relevance` (bm25 ranking).
|
|
77
|
+
- `--style` — reference style for labelling output and parsing `--in` (default `en-sbl`).
|
|
78
|
+
|
|
79
|
+
### `info` — show database metadata
|
|
80
|
+
|
|
81
|
+
```sh
|
|
82
|
+
uv run versiref-bible info BIBLE.db
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Prints the stored metadata (title, versification, source, build time, …) and the verse count.
|
|
86
|
+
|
|
87
|
+
## Development
|
|
88
|
+
|
|
89
|
+
```sh
|
|
90
|
+
uv run pytest # tests
|
|
91
|
+
uv run ruff check # lint
|
|
92
|
+
uv run mypy src # type-check
|
|
93
|
+
```
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "versiref-bible"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Access Bibles using versiref"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Fr. John Lawrence M. Polis", email = "emptier-sank-dose@duck.com" }
|
|
8
|
+
]
|
|
9
|
+
classifiers = [
|
|
10
|
+
"Development Status :: 3 - Alpha",
|
|
11
|
+
"Intended Audience :: Developers",
|
|
12
|
+
"Intended Audience :: Religion",
|
|
13
|
+
"Operating System :: OS Independent",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
16
|
+
"Programming Language :: Python :: 3.10",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
20
|
+
"Programming Language :: Python :: 3.14",
|
|
21
|
+
"Topic :: Religion",
|
|
22
|
+
"Topic :: Text Processing",
|
|
23
|
+
]
|
|
24
|
+
requires-python = ">=3.10"
|
|
25
|
+
license = "MIT"
|
|
26
|
+
license-files = ["LICEN[CS]E*"]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"versiref>=0.5.1",
|
|
29
|
+
"click>=8.1.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
versiref-bible = "versiref.bible.cli:main"
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/fiapps/versiref-bible"
|
|
37
|
+
Issues = "https://github.com/fiapps/versiref-bible/issues"
|
|
38
|
+
|
|
39
|
+
[build-system]
|
|
40
|
+
requires = ["uv_build>=0.9.28,<0.10.0"]
|
|
41
|
+
build-backend = "uv_build"
|
|
42
|
+
|
|
43
|
+
[tool.uv.build-backend]
|
|
44
|
+
module-name = "versiref.bible"
|
|
45
|
+
|
|
46
|
+
[tool.mypy]
|
|
47
|
+
mypy_path = "src"
|
|
48
|
+
namespace_packages = true
|
|
49
|
+
explicit_package_bases = true
|
|
50
|
+
|
|
51
|
+
[tool.ruff.lint]
|
|
52
|
+
select = ["D"]
|
|
53
|
+
ignore = ["D203", "D213"]
|
|
54
|
+
|
|
55
|
+
[tool.ruff.lint.per-file-ignores]
|
|
56
|
+
"tests/**" = ["D101", "D102", "D103"]
|
|
57
|
+
|
|
58
|
+
[dependency-groups]
|
|
59
|
+
dev = [
|
|
60
|
+
"mypy>=1.19.1",
|
|
61
|
+
"pytest>=9.0.2",
|
|
62
|
+
"ruff>=0.15.1",
|
|
63
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Access Bibles stored in an SQLite database with versiref."""
|
|
2
|
+
|
|
3
|
+
from versiref.bible.builder import build_database
|
|
4
|
+
from versiref.bible.database import Database
|
|
5
|
+
from versiref.bible.models import BuildStats, Verse
|
|
6
|
+
from versiref.bible.reader import format_verse, search_verses, show_verses
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"BuildStats",
|
|
10
|
+
"Database",
|
|
11
|
+
"Verse",
|
|
12
|
+
"build_database",
|
|
13
|
+
"format_verse",
|
|
14
|
+
"search_verses",
|
|
15
|
+
"show_verses",
|
|
16
|
+
]
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Build a SQLite Bible database from a CCAT-format text file."""
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from versiref import RefStyle, SimpleBibleRef, Versification
|
|
7
|
+
|
|
8
|
+
from .database import SCHEMA_VERSION, Database
|
|
9
|
+
from .models import BuildStats, Verse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _parse_line(line: str) -> tuple[str, int, int, str] | None:
|
|
13
|
+
"""Parse one CCAT line ``Abbrev C:V text`` into its components.
|
|
14
|
+
|
|
15
|
+
Returns ``(abbrev, chapter, verse, text)`` or ``None`` if the line does not
|
|
16
|
+
match the expected shape. CCAT footnotes/formatting in ``text`` are kept
|
|
17
|
+
verbatim (parsing them is future work).
|
|
18
|
+
"""
|
|
19
|
+
abbrev, _, rest = line.partition(" ")
|
|
20
|
+
cv, _, text = rest.partition(" ")
|
|
21
|
+
if not abbrev or ":" not in cv:
|
|
22
|
+
return None
|
|
23
|
+
chapter_s, _, verse_s = cv.partition(":")
|
|
24
|
+
try:
|
|
25
|
+
return abbrev, int(chapter_s), int(verse_s), text
|
|
26
|
+
except ValueError:
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def build_database(
|
|
31
|
+
input_path: str | Path,
|
|
32
|
+
output_path: str | Path,
|
|
33
|
+
*,
|
|
34
|
+
versification: str,
|
|
35
|
+
title: str | None = None,
|
|
36
|
+
book_style: str = "en-bibleworks",
|
|
37
|
+
encoding: str = "utf-8",
|
|
38
|
+
) -> BuildStats:
|
|
39
|
+
"""Build a Bible database from a CCAT-format ``.cat`` file.
|
|
40
|
+
|
|
41
|
+
Each non-blank line is read as ``Abbrev C:V text``. The abbreviation is
|
|
42
|
+
mapped to a Paratext book ID via the ``book_style`` recognized names, and a
|
|
43
|
+
verse key is computed under ``versification``. Lines whose abbreviation is
|
|
44
|
+
unrecognized (e.g. the Sirach prologue ``Sip``) or whose book is absent from
|
|
45
|
+
the versification are warned-and-skipped — see :class:`BuildStats`.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
input_path: Source ``.cat`` file.
|
|
49
|
+
output_path: Destination database (overwritten if it exists).
|
|
50
|
+
versification: Named versification for the Bible (e.g. ``eng``).
|
|
51
|
+
title: Human-readable title stored in metadata.
|
|
52
|
+
book_style: Named reference style whose recognized names map the
|
|
53
|
+
file's book abbreviations (default ``en-bibleworks``).
|
|
54
|
+
encoding: Text encoding of the input file (default ``utf-8``;
|
|
55
|
+
the CEI sample needs ``cp1252``).
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
A :class:`BuildStats` summary of what was stored and skipped.
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
input_path = Path(input_path)
|
|
62
|
+
output_path = Path(output_path)
|
|
63
|
+
|
|
64
|
+
vers = Versification.named(versification)
|
|
65
|
+
recognized = RefStyle.named(book_style).recognized_names
|
|
66
|
+
|
|
67
|
+
stats = BuildStats()
|
|
68
|
+
verses: dict[int, Verse] = {}
|
|
69
|
+
|
|
70
|
+
with input_path.open(encoding=encoding) as handle:
|
|
71
|
+
for raw in handle:
|
|
72
|
+
line = raw.rstrip("\n")
|
|
73
|
+
if not line.strip():
|
|
74
|
+
continue
|
|
75
|
+
parsed = _parse_line(line)
|
|
76
|
+
if parsed is None:
|
|
77
|
+
stats.malformed += 1
|
|
78
|
+
continue
|
|
79
|
+
abbrev, chapter, verse, text = parsed
|
|
80
|
+
|
|
81
|
+
book_id = recognized.get(abbrev)
|
|
82
|
+
if book_id is None:
|
|
83
|
+
stats.unknown_books[abbrev] = stats.unknown_books.get(abbrev, 0) + 1
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
# range_keys yields nothing when the book is not in this
|
|
87
|
+
# versification; that is the off-scheme, warn-and-skip case.
|
|
88
|
+
ranges = list(SimpleBibleRef.for_range(book_id, chapter, verse).range_keys(vers))
|
|
89
|
+
if not ranges:
|
|
90
|
+
stats.off_scheme_books[abbrev] = (
|
|
91
|
+
stats.off_scheme_books.get(abbrev, 0) + 1
|
|
92
|
+
)
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
key = ranges[0][0]
|
|
96
|
+
if key in verses:
|
|
97
|
+
stats.duplicates += 1
|
|
98
|
+
verses[key] = Verse(key, book_id, chapter, verse, text)
|
|
99
|
+
|
|
100
|
+
stats.stored = len(verses)
|
|
101
|
+
|
|
102
|
+
output_path.unlink(missing_ok=True)
|
|
103
|
+
with Database(output_path) as db:
|
|
104
|
+
db.create_schema()
|
|
105
|
+
db.insert_verses(verses.values())
|
|
106
|
+
db.rebuild_fts()
|
|
107
|
+
db.set_metadata("schema_version", SCHEMA_VERSION)
|
|
108
|
+
db.set_metadata("versification", versification)
|
|
109
|
+
db.set_metadata("source", input_path.name)
|
|
110
|
+
db.set_metadata("verse_count", str(stats.stored))
|
|
111
|
+
db.set_metadata("built_at", datetime.datetime.now().isoformat(timespec="seconds"))
|
|
112
|
+
if title is not None:
|
|
113
|
+
db.set_metadata("title", title)
|
|
114
|
+
|
|
115
|
+
return stats
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""Command-line interface for versiref-bible."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
from versiref import RefStyle
|
|
8
|
+
|
|
9
|
+
from .builder import build_database
|
|
10
|
+
from .database import Database
|
|
11
|
+
from .models import BuildStats
|
|
12
|
+
from .reader import format_verse, search_verses, show_verses
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.group()
|
|
16
|
+
@click.version_option(package_name="versiref-bible")
|
|
17
|
+
def main() -> None:
|
|
18
|
+
"""Access Bibles stored in an SQLite database with versiref."""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _report_build(stats: BuildStats, output: Path) -> None:
|
|
23
|
+
"""Print a build summary, sending skip warnings to stderr."""
|
|
24
|
+
click.echo(f"✓ Built {output} ({stats.stored} verses)")
|
|
25
|
+
if stats.unknown_books:
|
|
26
|
+
total = sum(stats.unknown_books.values())
|
|
27
|
+
names = ", ".join(sorted(stats.unknown_books))
|
|
28
|
+
click.echo(
|
|
29
|
+
f" warning: skipped {total} line(s) with unrecognized book "
|
|
30
|
+
f"abbreviations: {names}",
|
|
31
|
+
err=True,
|
|
32
|
+
)
|
|
33
|
+
if stats.off_scheme_books:
|
|
34
|
+
total = sum(stats.off_scheme_books.values())
|
|
35
|
+
names = ", ".join(sorted(stats.off_scheme_books))
|
|
36
|
+
click.echo(
|
|
37
|
+
f" warning: skipped {total} line(s) in books not in the "
|
|
38
|
+
f"versification: {names}",
|
|
39
|
+
err=True,
|
|
40
|
+
)
|
|
41
|
+
if stats.malformed:
|
|
42
|
+
click.echo(
|
|
43
|
+
f" warning: skipped {stats.malformed} malformed line(s)", err=True
|
|
44
|
+
)
|
|
45
|
+
if stats.duplicates:
|
|
46
|
+
click.echo(
|
|
47
|
+
f" warning: {stats.duplicates} duplicate verse key(s) (last kept)",
|
|
48
|
+
err=True,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@main.command()
|
|
53
|
+
@click.argument(
|
|
54
|
+
"input_file", type=click.Path(exists=True, dir_okay=False, path_type=Path)
|
|
55
|
+
)
|
|
56
|
+
@click.option(
|
|
57
|
+
"-o",
|
|
58
|
+
"--output",
|
|
59
|
+
"output_file",
|
|
60
|
+
type=click.Path(dir_okay=False, path_type=Path),
|
|
61
|
+
default=None,
|
|
62
|
+
help="Output database path [default: INPUT with a .db suffix]",
|
|
63
|
+
)
|
|
64
|
+
@click.option(
|
|
65
|
+
"-v",
|
|
66
|
+
"--versification",
|
|
67
|
+
required=True,
|
|
68
|
+
help="Named versification of the Bible (e.g. eng, lxx, cei).",
|
|
69
|
+
)
|
|
70
|
+
@click.option("--title", default=None, help="Human-readable title for the Bible.")
|
|
71
|
+
@click.option(
|
|
72
|
+
"--book-style",
|
|
73
|
+
default="en-bibleworks",
|
|
74
|
+
show_default=True,
|
|
75
|
+
help="Reference style whose names map the file's book abbreviations.",
|
|
76
|
+
)
|
|
77
|
+
@click.option(
|
|
78
|
+
"--encoding",
|
|
79
|
+
default="utf-8",
|
|
80
|
+
show_default=True,
|
|
81
|
+
help="Text encoding of the input file (the CEI sample needs cp1252).",
|
|
82
|
+
)
|
|
83
|
+
def build(
|
|
84
|
+
input_file: Path,
|
|
85
|
+
output_file: Path | None,
|
|
86
|
+
versification: str,
|
|
87
|
+
title: str | None,
|
|
88
|
+
book_style: str,
|
|
89
|
+
encoding: str,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Build a Bible database from a CCAT-format text file.
|
|
92
|
+
|
|
93
|
+
Each line of INPUT_FILE is read as ``Abbrev C:V text``. Lines whose book
|
|
94
|
+
abbreviation is unrecognized, or whose book is not in the chosen
|
|
95
|
+
versification, are skipped with a warning.
|
|
96
|
+
"""
|
|
97
|
+
output = output_file or input_file.with_suffix(".db")
|
|
98
|
+
try:
|
|
99
|
+
stats = build_database(
|
|
100
|
+
input_file,
|
|
101
|
+
output,
|
|
102
|
+
versification=versification,
|
|
103
|
+
title=title,
|
|
104
|
+
book_style=book_style,
|
|
105
|
+
encoding=encoding,
|
|
106
|
+
)
|
|
107
|
+
_report_build(stats, output)
|
|
108
|
+
except (ValueError, LookupError, OSError) as exc:
|
|
109
|
+
click.echo(f"Error: {exc}", err=True)
|
|
110
|
+
sys.exit(1)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@main.command()
|
|
114
|
+
@click.argument(
|
|
115
|
+
"database", type=click.Path(exists=True, dir_okay=False, path_type=Path)
|
|
116
|
+
)
|
|
117
|
+
@click.argument("reference")
|
|
118
|
+
@click.option(
|
|
119
|
+
"--style",
|
|
120
|
+
default="en-sbl",
|
|
121
|
+
show_default=True,
|
|
122
|
+
help="Reference style for parsing input and labelling output.",
|
|
123
|
+
)
|
|
124
|
+
@click.option(
|
|
125
|
+
"--from-versification",
|
|
126
|
+
default=None,
|
|
127
|
+
help="Interpret REFERENCE in this versification and map it to the database's.",
|
|
128
|
+
)
|
|
129
|
+
def show(
|
|
130
|
+
database: Path,
|
|
131
|
+
reference: str,
|
|
132
|
+
style: str,
|
|
133
|
+
from_versification: str | None,
|
|
134
|
+
) -> None:
|
|
135
|
+
"""Print the verses covered by a Bible REFERENCE, one per line.
|
|
136
|
+
|
|
137
|
+
Each line is ``reference<TAB>text``.
|
|
138
|
+
"""
|
|
139
|
+
try:
|
|
140
|
+
verses, db_vers = show_verses(
|
|
141
|
+
database,
|
|
142
|
+
reference,
|
|
143
|
+
style_name=style,
|
|
144
|
+
from_versification=from_versification,
|
|
145
|
+
)
|
|
146
|
+
if not verses:
|
|
147
|
+
click.echo("No verses found.", err=True)
|
|
148
|
+
return
|
|
149
|
+
ref_style = RefStyle.named(style)
|
|
150
|
+
for verse in verses:
|
|
151
|
+
click.echo(format_verse(verse, ref_style, db_vers))
|
|
152
|
+
except (ValueError, LookupError) as exc:
|
|
153
|
+
click.echo(f"Error: {exc}", err=True)
|
|
154
|
+
sys.exit(1)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@main.command()
|
|
158
|
+
@click.argument(
|
|
159
|
+
"database", type=click.Path(exists=True, dir_okay=False, path_type=Path)
|
|
160
|
+
)
|
|
161
|
+
@click.argument("query")
|
|
162
|
+
@click.option(
|
|
163
|
+
"-n",
|
|
164
|
+
"--limit",
|
|
165
|
+
type=int,
|
|
166
|
+
default=20,
|
|
167
|
+
show_default=True,
|
|
168
|
+
help="Maximum number of verses to return.",
|
|
169
|
+
)
|
|
170
|
+
@click.option(
|
|
171
|
+
"--in",
|
|
172
|
+
"scope",
|
|
173
|
+
default=None,
|
|
174
|
+
help='Restrict the search to a reference (e.g. "Gen 1", "John").',
|
|
175
|
+
)
|
|
176
|
+
@click.option(
|
|
177
|
+
"--order",
|
|
178
|
+
type=click.Choice(["canonical", "relevance"]),
|
|
179
|
+
default="canonical",
|
|
180
|
+
show_default=True,
|
|
181
|
+
help="Result order: canonical (verse order) or relevance (bm25 rank).",
|
|
182
|
+
)
|
|
183
|
+
@click.option(
|
|
184
|
+
"--style",
|
|
185
|
+
default="en-sbl",
|
|
186
|
+
show_default=True,
|
|
187
|
+
help="Reference style for labelling output and parsing --in.",
|
|
188
|
+
)
|
|
189
|
+
def search(
|
|
190
|
+
database: Path,
|
|
191
|
+
query: str,
|
|
192
|
+
limit: int,
|
|
193
|
+
scope: str | None,
|
|
194
|
+
order: str,
|
|
195
|
+
style: str,
|
|
196
|
+
) -> None:
|
|
197
|
+
"""Full-text search verse text with FTS5 QUERY.
|
|
198
|
+
|
|
199
|
+
QUERY uses SQLite FTS5 syntax (e.g. ``light``, ``"living water"``,
|
|
200
|
+
``love AND world``). Output is ``reference<TAB>text``, in canonical verse
|
|
201
|
+
order by default (use ``--order relevance`` for bm25 ranking).
|
|
202
|
+
"""
|
|
203
|
+
try:
|
|
204
|
+
verses, total, db_vers = search_verses(
|
|
205
|
+
database, query, limit=limit, scope=scope, order=order, style_name=style
|
|
206
|
+
)
|
|
207
|
+
if not verses:
|
|
208
|
+
click.echo("No matching verses.", err=True)
|
|
209
|
+
return
|
|
210
|
+
ref_style = RefStyle.named(style)
|
|
211
|
+
for verse in verses:
|
|
212
|
+
click.echo(format_verse(verse, ref_style, db_vers))
|
|
213
|
+
if total > len(verses):
|
|
214
|
+
click.echo(
|
|
215
|
+
f"… showing {len(verses)} of {total} matches "
|
|
216
|
+
f"(raise --limit to see more)",
|
|
217
|
+
err=True,
|
|
218
|
+
)
|
|
219
|
+
except (ValueError, LookupError) as exc:
|
|
220
|
+
click.echo(f"Error: {exc}", err=True)
|
|
221
|
+
sys.exit(1)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@main.command()
|
|
225
|
+
@click.argument(
|
|
226
|
+
"database", type=click.Path(exists=True, dir_okay=False, path_type=Path)
|
|
227
|
+
)
|
|
228
|
+
def info(database: Path) -> None:
|
|
229
|
+
"""Show metadata and verse count for a Bible database."""
|
|
230
|
+
try:
|
|
231
|
+
with Database(database) as db:
|
|
232
|
+
metadata = db.get_all_metadata()
|
|
233
|
+
count = db.count_verses()
|
|
234
|
+
for key, value in metadata.items():
|
|
235
|
+
click.echo(f"{key}: {value}")
|
|
236
|
+
click.echo(f"verses: {count}")
|
|
237
|
+
except (ValueError, OSError) as exc:
|
|
238
|
+
click.echo(f"Error: {exc}", err=True)
|
|
239
|
+
sys.exit(1)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
if __name__ == "__main__":
|
|
243
|
+
main()
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""Database schema and operations for versiref-bible."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
from collections.abc import Iterable, Sequence
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from .models import Verse
|
|
9
|
+
|
|
10
|
+
SCHEMA_VERSION = "1.0"
|
|
11
|
+
|
|
12
|
+
SCHEMA_SQL = """
|
|
13
|
+
-- Key-value metadata (title, versification, source, etc.)
|
|
14
|
+
CREATE TABLE IF NOT EXISTS metadata (
|
|
15
|
+
key TEXT PRIMARY KEY,
|
|
16
|
+
value TEXT NOT NULL
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
-- One row per Bible verse. verse_key is the BBCCCVVV integer computed under
|
|
20
|
+
-- the database's versification; it doubles as the FTS5 rowid.
|
|
21
|
+
CREATE TABLE IF NOT EXISTS verses (
|
|
22
|
+
verse_key INTEGER PRIMARY KEY,
|
|
23
|
+
book_id TEXT NOT NULL,
|
|
24
|
+
chapter INTEGER NOT NULL,
|
|
25
|
+
verse INTEGER NOT NULL,
|
|
26
|
+
text TEXT NOT NULL
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
CREATE INDEX IF NOT EXISTS idx_verses_bcv ON verses(book_id, chapter, verse);
|
|
30
|
+
|
|
31
|
+
-- FTS5 full-text index over verse text (external content = verses table)
|
|
32
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS verses_fts USING fts5(
|
|
33
|
+
text,
|
|
34
|
+
content='verses',
|
|
35
|
+
content_rowid='verse_key'
|
|
36
|
+
);
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _range_clause(ranges: Sequence[tuple[int, int]]) -> tuple[str, list[Any]]:
|
|
41
|
+
"""Build an ``OR`` of ``verse_key BETWEEN`` clauses for a set of key ranges.
|
|
42
|
+
|
|
43
|
+
Returns the parenthesised SQL fragment and its bound parameters. An empty
|
|
44
|
+
``ranges`` yields ``("1=0", [])`` so the caller matches nothing.
|
|
45
|
+
"""
|
|
46
|
+
if not ranges:
|
|
47
|
+
return "1=0", []
|
|
48
|
+
parts = ["verse_key BETWEEN ? AND ?"] * len(ranges)
|
|
49
|
+
params: list[Any] = []
|
|
50
|
+
for start, end in ranges:
|
|
51
|
+
params.extend((start, end))
|
|
52
|
+
return "(" + " OR ".join(parts) + ")", params
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class Database:
|
|
56
|
+
"""Manages SQLite database connections and operations for a Bible."""
|
|
57
|
+
|
|
58
|
+
def __init__(self, db_path: str | Path):
|
|
59
|
+
"""Initialize the database wrapper.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
db_path: Path to the SQLite database file.
|
|
63
|
+
|
|
64
|
+
"""
|
|
65
|
+
self.db_path = Path(db_path)
|
|
66
|
+
self.conn: sqlite3.Connection | None = None
|
|
67
|
+
|
|
68
|
+
def __enter__(self) -> "Database":
|
|
69
|
+
"""Open the connection on context entry."""
|
|
70
|
+
self.connect()
|
|
71
|
+
return self
|
|
72
|
+
|
|
73
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
74
|
+
"""Close the connection on context exit."""
|
|
75
|
+
self.close()
|
|
76
|
+
|
|
77
|
+
def connect(self) -> None:
|
|
78
|
+
"""Open the database connection."""
|
|
79
|
+
self.conn = sqlite3.connect(self.db_path)
|
|
80
|
+
self.conn.row_factory = sqlite3.Row
|
|
81
|
+
|
|
82
|
+
def close(self) -> None:
|
|
83
|
+
"""Close the database connection."""
|
|
84
|
+
if self.conn:
|
|
85
|
+
self.conn.close()
|
|
86
|
+
self.conn = None
|
|
87
|
+
|
|
88
|
+
def _require_conn(self) -> sqlite3.Connection:
|
|
89
|
+
if self.conn is None:
|
|
90
|
+
raise RuntimeError("Database not connected")
|
|
91
|
+
return self.conn
|
|
92
|
+
|
|
93
|
+
def create_schema(self) -> None:
|
|
94
|
+
"""Create the database schema if it does not exist."""
|
|
95
|
+
conn = self._require_conn()
|
|
96
|
+
conn.executescript(SCHEMA_SQL)
|
|
97
|
+
conn.commit()
|
|
98
|
+
|
|
99
|
+
def set_metadata(self, key: str, value: str) -> None:
|
|
100
|
+
"""Set a single metadata key-value pair."""
|
|
101
|
+
conn = self._require_conn()
|
|
102
|
+
conn.execute(
|
|
103
|
+
"INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)", (key, value)
|
|
104
|
+
)
|
|
105
|
+
conn.commit()
|
|
106
|
+
|
|
107
|
+
def get_metadata(self, key: str) -> str | None:
|
|
108
|
+
"""Return a metadata value by key, or None if absent."""
|
|
109
|
+
conn = self._require_conn()
|
|
110
|
+
cursor = conn.execute("SELECT value FROM metadata WHERE key = ?", (key,))
|
|
111
|
+
row = cursor.fetchone()
|
|
112
|
+
return row["value"] if row else None
|
|
113
|
+
|
|
114
|
+
def get_all_metadata(self) -> dict[str, str]:
|
|
115
|
+
"""Return all metadata as a dictionary."""
|
|
116
|
+
conn = self._require_conn()
|
|
117
|
+
cursor = conn.execute("SELECT key, value FROM metadata ORDER BY key")
|
|
118
|
+
return {row["key"]: row["value"] for row in cursor.fetchall()}
|
|
119
|
+
|
|
120
|
+
def insert_verses(self, verses: Iterable[Verse]) -> None:
|
|
121
|
+
"""Bulk-insert verses in a single transaction.
|
|
122
|
+
|
|
123
|
+
Does not rebuild the FTS index; call :meth:`rebuild_fts` afterwards.
|
|
124
|
+
"""
|
|
125
|
+
conn = self._require_conn()
|
|
126
|
+
conn.executemany(
|
|
127
|
+
"INSERT OR REPLACE INTO verses (verse_key, book_id, chapter, verse, text)"
|
|
128
|
+
" VALUES (?, ?, ?, ?, ?)",
|
|
129
|
+
[(v.key, v.book_id, v.chapter, v.verse, v.text) for v in verses],
|
|
130
|
+
)
|
|
131
|
+
conn.commit()
|
|
132
|
+
|
|
133
|
+
def rebuild_fts(self) -> None:
|
|
134
|
+
"""Rebuild the FTS5 index from the external-content verses table."""
|
|
135
|
+
conn = self._require_conn()
|
|
136
|
+
conn.execute("INSERT INTO verses_fts(verses_fts) VALUES('rebuild')")
|
|
137
|
+
conn.commit()
|
|
138
|
+
|
|
139
|
+
def count_verses(self) -> int:
|
|
140
|
+
"""Return the total number of stored verses."""
|
|
141
|
+
conn = self._require_conn()
|
|
142
|
+
cursor = conn.execute("SELECT COUNT(*) AS n FROM verses")
|
|
143
|
+
return int(cursor.fetchone()["n"])
|
|
144
|
+
|
|
145
|
+
def verses_in_ranges(self, ranges: Sequence[tuple[int, int]]) -> list[Verse]:
|
|
146
|
+
"""Return verses whose key falls in any of the given key ranges.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
ranges: ``(start_key, end_key)`` pairs (inclusive).
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Matching verses ordered by verse key (i.e. canonical order).
|
|
153
|
+
|
|
154
|
+
"""
|
|
155
|
+
conn = self._require_conn()
|
|
156
|
+
clause, params = _range_clause(ranges)
|
|
157
|
+
cursor = conn.execute(
|
|
158
|
+
"SELECT verse_key, book_id, chapter, verse, text FROM verses"
|
|
159
|
+
f" WHERE {clause} ORDER BY verse_key",
|
|
160
|
+
params,
|
|
161
|
+
)
|
|
162
|
+
return [_row_to_verse(row) for row in cursor.fetchall()]
|
|
163
|
+
|
|
164
|
+
def search(
|
|
165
|
+
self,
|
|
166
|
+
query: str,
|
|
167
|
+
limit: int,
|
|
168
|
+
ranges: Sequence[tuple[int, int]] | None = None,
|
|
169
|
+
order: str = "canonical",
|
|
170
|
+
) -> list[Verse]:
|
|
171
|
+
"""Full-text search verse text.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
query: FTS5 MATCH query.
|
|
175
|
+
limit: Maximum number of verses to return.
|
|
176
|
+
ranges: Optional key ranges to restrict the search to.
|
|
177
|
+
order: ``"canonical"`` for verse-key (Bible) order, or
|
|
178
|
+
``"relevance"`` for FTS5 bm25 rank (best first), tie-broken by
|
|
179
|
+
verse key.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Matching verses in the requested order, capped at ``limit``.
|
|
183
|
+
|
|
184
|
+
"""
|
|
185
|
+
if order not in ("canonical", "relevance"):
|
|
186
|
+
raise ValueError(f"Invalid order {order!r}: use 'canonical' or 'relevance'.")
|
|
187
|
+
conn = self._require_conn()
|
|
188
|
+
sql = [
|
|
189
|
+
"SELECT v.verse_key, v.book_id, v.chapter, v.verse, v.text",
|
|
190
|
+
"FROM verses_fts f JOIN verses v ON v.verse_key = f.rowid",
|
|
191
|
+
"WHERE f.text MATCH ?",
|
|
192
|
+
]
|
|
193
|
+
params: list[Any] = [query]
|
|
194
|
+
if ranges is not None:
|
|
195
|
+
clause, range_params = _range_clause(ranges)
|
|
196
|
+
sql.append(f"AND {clause}")
|
|
197
|
+
params.extend(range_params)
|
|
198
|
+
if order == "relevance":
|
|
199
|
+
sql.append("ORDER BY f.rank, v.verse_key")
|
|
200
|
+
else:
|
|
201
|
+
sql.append("ORDER BY v.verse_key")
|
|
202
|
+
sql.append("LIMIT ?")
|
|
203
|
+
params.append(limit)
|
|
204
|
+
cursor = conn.execute("\n".join(sql), params)
|
|
205
|
+
return [_row_to_verse(row) for row in cursor.fetchall()]
|
|
206
|
+
|
|
207
|
+
def count_matches(
|
|
208
|
+
self, query: str, ranges: Sequence[tuple[int, int]] | None = None
|
|
209
|
+
) -> int:
|
|
210
|
+
"""Count verses matching an FTS5 query (ignoring any limit)."""
|
|
211
|
+
conn = self._require_conn()
|
|
212
|
+
sql = ["SELECT COUNT(*) AS n FROM verses_fts f WHERE f.text MATCH ?"]
|
|
213
|
+
params: list[Any] = [query]
|
|
214
|
+
if ranges is not None:
|
|
215
|
+
clause, range_params = _range_clause(ranges)
|
|
216
|
+
# f.rowid is the verse_key; reuse the same clause column name.
|
|
217
|
+
sql.append("AND " + clause.replace("verse_key", "f.rowid"))
|
|
218
|
+
params.extend(range_params)
|
|
219
|
+
cursor = conn.execute("\n".join(sql), params)
|
|
220
|
+
return int(cursor.fetchone()["n"])
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _row_to_verse(row: sqlite3.Row) -> Verse:
|
|
224
|
+
return Verse(
|
|
225
|
+
key=row["verse_key"],
|
|
226
|
+
book_id=row["book_id"],
|
|
227
|
+
chapter=row["chapter"],
|
|
228
|
+
verse=row["verse"],
|
|
229
|
+
text=row["text"],
|
|
230
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Data models for versiref-bible."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass(frozen=True)
|
|
7
|
+
class Verse:
|
|
8
|
+
"""A single Bible verse stored in the database.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
key: Integer verse key (BBCCCVVV) from ``range_keys`` under the
|
|
12
|
+
database's versification; also the FTS5 rowid.
|
|
13
|
+
book_id: Paratext book ID (e.g. ``JHN``).
|
|
14
|
+
chapter: Chapter number.
|
|
15
|
+
verse: Verse number.
|
|
16
|
+
text: Verse text.
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
key: int
|
|
21
|
+
book_id: str
|
|
22
|
+
chapter: int
|
|
23
|
+
verse: int
|
|
24
|
+
text: str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class BuildStats:
|
|
29
|
+
"""Summary of a ``build`` run, for reporting to the operator.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
stored: Number of verses written to the database.
|
|
33
|
+
unknown_books: Abbreviation -> count for tokens the book style did not
|
|
34
|
+
recognize (e.g. the Sirach prologue ``Sip``).
|
|
35
|
+
off_scheme_books: Abbreviation -> count for books not present in the
|
|
36
|
+
chosen versification (no integer key can be computed).
|
|
37
|
+
malformed: Number of non-blank lines that did not parse as
|
|
38
|
+
``Abbrev C:V text``.
|
|
39
|
+
duplicates: Number of lines whose verse key collided with an earlier
|
|
40
|
+
line (later line wins).
|
|
41
|
+
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
stored: int = 0
|
|
45
|
+
unknown_books: dict[str, int] = field(default_factory=dict)
|
|
46
|
+
off_scheme_books: dict[str, int] = field(default_factory=dict)
|
|
47
|
+
malformed: int = 0
|
|
48
|
+
duplicates: int = 0
|
|
File without changes
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Query a Bible database: show verses by reference and full-text search."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from versiref import RefParser, RefStyle, SimpleBibleRef, Versification
|
|
7
|
+
|
|
8
|
+
from .database import Database
|
|
9
|
+
from .models import Verse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _db_versification(db: Database) -> Versification:
|
|
13
|
+
"""Return the versification a database was built with."""
|
|
14
|
+
name = db.get_metadata("versification")
|
|
15
|
+
if name is None:
|
|
16
|
+
raise ValueError("Database has no 'versification' metadata; rebuild it.")
|
|
17
|
+
return Versification.named(name)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def format_verse(verse: Verse, style: RefStyle, versification: Versification) -> str:
|
|
21
|
+
"""Format a verse as ``reference<TAB>text`` for plain-text output.
|
|
22
|
+
|
|
23
|
+
The reference label is formatted with ``style`` under ``versification``;
|
|
24
|
+
verse text is single-line in CCAT, so the tab keeps the line unambiguous.
|
|
25
|
+
"""
|
|
26
|
+
label = SimpleBibleRef.for_range(
|
|
27
|
+
verse.book_id, verse.chapter, verse.verse
|
|
28
|
+
).format(style, versification)
|
|
29
|
+
return f"{label}\t{verse.text}"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def show_verses(
|
|
33
|
+
db_path: str | Path,
|
|
34
|
+
reference: str,
|
|
35
|
+
*,
|
|
36
|
+
style_name: str = "en-sbl",
|
|
37
|
+
from_versification: str | None = None,
|
|
38
|
+
) -> tuple[list[Verse], Versification]:
|
|
39
|
+
"""Return the verses covered by a Bible reference.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
db_path: Path to the Bible database.
|
|
43
|
+
reference: A Bible reference string (parsed by versiref).
|
|
44
|
+
style_name: Reference style used to parse ``reference`` and to label
|
|
45
|
+
output (default ``en-sbl``).
|
|
46
|
+
from_versification: If given, interpret ``reference`` in this
|
|
47
|
+
versification and map it to the database's versification.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
``(verses, db_versification)`` — verses in canonical order plus the
|
|
51
|
+
database's versification (for labelling).
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ValueError: If the reference cannot be parsed.
|
|
55
|
+
|
|
56
|
+
"""
|
|
57
|
+
with Database(db_path) as db:
|
|
58
|
+
db_vers = _db_versification(db)
|
|
59
|
+
source_vers = (
|
|
60
|
+
Versification.named(from_versification)
|
|
61
|
+
if from_versification is not None
|
|
62
|
+
else db_vers
|
|
63
|
+
)
|
|
64
|
+
parser = RefParser(RefStyle.named(style_name), source_vers)
|
|
65
|
+
ref = parser.parse(reference, silent=True)
|
|
66
|
+
if ref is None:
|
|
67
|
+
raise ValueError(f"Could not parse reference: {reference!r}")
|
|
68
|
+
if from_versification is not None:
|
|
69
|
+
mapped = ref.map_to(db_vers)
|
|
70
|
+
if mapped is None:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Could not map {reference!r} from {from_versification} to "
|
|
73
|
+
f"the database's versification."
|
|
74
|
+
)
|
|
75
|
+
ref = mapped
|
|
76
|
+
verses = db.verses_in_ranges(list(ref.range_keys()))
|
|
77
|
+
return verses, db_vers
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def search_verses(
|
|
81
|
+
db_path: str | Path,
|
|
82
|
+
query: str,
|
|
83
|
+
*,
|
|
84
|
+
limit: int = 20,
|
|
85
|
+
scope: str | None = None,
|
|
86
|
+
order: str = "canonical",
|
|
87
|
+
style_name: str = "en-sbl",
|
|
88
|
+
) -> tuple[list[Verse], int, Versification]:
|
|
89
|
+
"""Full-text search verse text.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
db_path: Path to the Bible database.
|
|
93
|
+
query: FTS5 MATCH query.
|
|
94
|
+
limit: Maximum number of verses to return.
|
|
95
|
+
scope: Optional Bible reference restricting the search (parsed in the
|
|
96
|
+
database's versification).
|
|
97
|
+
order: ``"canonical"`` for verse (Bible) order, or ``"relevance"`` for
|
|
98
|
+
bm25 rank (best first, tie-broken by verse order).
|
|
99
|
+
style_name: Reference style used to parse ``scope`` and to label output.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
``(verses, total_matches, db_versification)`` where ``total_matches``
|
|
103
|
+
is the match count before ``limit`` was applied.
|
|
104
|
+
|
|
105
|
+
Raises:
|
|
106
|
+
ValueError: If the scope reference or the FTS query is invalid.
|
|
107
|
+
|
|
108
|
+
"""
|
|
109
|
+
with Database(db_path) as db:
|
|
110
|
+
db_vers = _db_versification(db)
|
|
111
|
+
ranges: list[tuple[int, int]] | None = None
|
|
112
|
+
if scope is not None:
|
|
113
|
+
parser = RefParser(RefStyle.named(style_name), db_vers)
|
|
114
|
+
scope_ref = parser.parse(scope, silent=True)
|
|
115
|
+
if scope_ref is None:
|
|
116
|
+
raise ValueError(f"Could not parse scope reference: {scope!r}")
|
|
117
|
+
ranges = list(scope_ref.range_keys())
|
|
118
|
+
try:
|
|
119
|
+
results = db.search(query, limit, ranges, order)
|
|
120
|
+
total = db.count_matches(query, ranges)
|
|
121
|
+
except sqlite3.OperationalError as exc:
|
|
122
|
+
raise ValueError(f"Invalid search query {query!r}: {exc}") from exc
|
|
123
|
+
return results, total, db_vers
|